Deep Learning
=============

Analysing a small corpus of Trump's public speaking using Word2Vec, skip-gram and T-SNE.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
%matplotlib inline
from __future__ import print_function
import collections
import string
import math
import numpy as np
import os
import random
import tensorflow as tf
import zipfile
from matplotlib import pylab
from six.moves import range
from six.moves.urllib.request import urlretrieve
from sklearn.manifold import TSNE
from time import time

# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
%matplotlib inline
from __future__ import print_function
import collections
import string
import math
import numpy as np
import os
import random
import tensorflow as tf
import zipfile
from matplotlib import pylab
from six.moves import range
from six.moves.urllib.request import urlretrieve
from sklearn.manifold import TSNE
from time import time

In [2]:
speaking_solo = [
    "aipac032116.txt",
    "blackhist020117.txt",
    "detroit0808.txt",
    "inaguration.txt",
    "law_order081616.txt",
    "miamispeech021617.txt",
    "national_security061316.txt",
    "national_security090716.txt",
    "onHilary.txt",
    "victory_speach_e.txt"]
speaking_not_solo = [
    "abc_interview012617_multi.txt",
    "clintonVtrump11016_multi.txt",
    "cpac022417_multi.txt",
    "gop_debate_all.txt",
    "immigration110116_multi.txt",
    "press_con021617_multi.txt",
    "townhall032916_multi.txt",
    "wp_interview032116_multi.txt"]
    


Helper functions for extracting words from the above files

In [3]:
punctn = string.punctuation + "…" + "—" + "’" + "“"
def getSoloWords(filename):
    with open("./solo/"+filename, 'rb') as f:
        words = f.read().split()
    return [w.translate(string.maketrans("","") , punctn).lower() for w in words]
def getNonSolo(filename):
    with open("./non_solo/"+filename, 'rb') as f:
        words = f.read().split()
        
    trump_speaking = False
    trump_words = []
    for word in words:
        # Colon indicates speaker will speak
        if word[-1] == ':':
            if "TRUMP" in word: # If Trump is speaker
                trump_speaking = True
            else:
                trump_speaking = False
            continue
            
        # take just spoken words, ignore transcipt descriptions (which are in parenthesis)
        if trump_speaking and word[0] != '(' and word[-1] != ')':
            # Strip punctuation
            word = word.translate(string.maketrans("","") , punctn).lower()
            if word != "":
                trump_words.append(word)
    return trump_words


Extract all words spoken

In [4]:
word_corpus = []
for doc in speaking_solo:
    word_corpus += getSoloWords(doc)
for doc in speaking_not_solo:
    word_corpus += getNonSolo(doc)

Build the dictionary

In [5]:
def build_dataset(words):
  count = []
  count.extend(collections.Counter(words).most_common())
  dictionary = dict()
  for word, _ in count:
    dictionary[word] = len(dictionary)
  data = list()
  for word in words:
    if word in dictionary:
      index = dictionary[word]
    data.append(index)
  reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 
  return data, count, dictionary, reverse_dictionary
s = time()
data, count, dictionary, reverse_dictionary = build_dataset(word_corpus)
print('Most common words', count[:5])
print('Sample data', data[:10])
print(time()-s)

Most common words [('the', 4335), ('and', 3520), ('i', 3166), ('to', 3153), ('a', 2385)]
Sample data [92, 899, 119, 6, 23, 88, 2, 579, 3, 6]
0.113315105438


Function to generate a training batch for the skip-gram model.

In [6]:
data_index = 0

def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
  assert num_skips <= 2 * skip_window
  batch = np.ndarray(shape=(batch_size), dtype=np.int32)
  labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  span = 2 * skip_window + 1 # [ skip_window target skip_window ]
  buffer = collections.deque(maxlen=span)
  for _ in range(span):
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size // num_skips):
    target = skip_window  # target label at the center of the buffer
    targets_to_avoid = [ skip_window ]
    for j in range(num_skips):
      while target in targets_to_avoid:
        target = random.randint(0, span - 1)
      targets_to_avoid.append(target)
      batch[i * num_skips + j] = buffer[skip_window]
      labels[i * num_skips + j, 0] = buffer[target]
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  return batch, labels

print('data:', [reverse_dictionary[di] for di in data[:8]])

for num_skips, skip_window in [(2, 1), (4, 2)]:
    data_index = 0
    batch, labels = generate_batch(batch_size=8, num_skips=num_skips, skip_window=skip_window)
    print('\nwith num_skips = %d and skip_window = %d:' % (num_skips, skip_window))
    print('    batch:', [reverse_dictionary[bi] for bi in batch])
    print('    labels:', [reverse_dictionary[li] for li in labels.reshape(8)])

data: ['good', 'evening', 'thank', 'you', 'very', 'much', 'i', 'speak']

with num_skips = 2 and skip_window = 1:
    batch: ['evening', 'evening', 'thank', 'thank', 'you', 'you', 'very', 'very']
    labels: ['thank', 'good', 'evening', 'you', 'very', 'thank', 'you', 'much']

with num_skips = 4 and skip_window = 2:
    batch: ['thank', 'thank', 'thank', 'thank', 'you', 'you', 'you', 'you']
    labels: ['very', 'good', 'you', 'evening', 'much', 'thank', 'very', 'evening']


Build graph and train a skip-gram model.

In [7]:
vocabulary_size = len(dictionary) # Using all words given small amount of text
batch_size = 48
embedding_size = 48  # Dimension of the embedding vector.
skip_window = 6       # How many words to consider left and right.
num_skips = 12         # How many times to reuse an input to generate a label.

# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent.
valid_size = 16     # Random set of words to evaluate similarity on.
valid_window = 100  # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
####
vexs = ["hillary", "america", "great", "terrorism", "press",  "china", "build", "president"]
valid_size = len(vexs)
valid_examples = [dictionary[w] for w in vexs]
num_sampled = 60    # Number of negative examples to sample.

graph = tf.Graph()

with graph.as_default():

  # Input data.
  train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
  train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
  valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

  # Ops and variables pinned to the CPU because of missing GPU implementation
  with tf.device('/cpu:0'):
    # Look up embeddings for inputs.
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
    embed = tf.nn.embedding_lookup(embeddings, train_inputs)

    # Construct the variables for the NCE loss
    nce_weights = tf.Variable(
        tf.truncated_normal([vocabulary_size, embedding_size],
                            stddev=1.0 / math.sqrt(embedding_size)))
    nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

  # Compute the average NCE loss for the batch.
  # tf.nce_loss automatically draws a new sample of the negative labels each
  # time we evaluate the loss.
  loss = tf.reduce_mean(
      tf.nn.nce_loss(weights=nce_weights,
                     biases=nce_biases,
                     labels=train_labels,
                     inputs=embed,
                     num_sampled=num_sampled,
                     num_classes=vocabulary_size))

  # Construct the SGD optimizer using a learning rate of 1.0.
  optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

  # Compute the cosine similarity between minibatch examples and all embeddings.
  norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
  normalized_embeddings = embeddings / norm
  valid_embeddings = tf.nn.embedding_lookup(
      normalized_embeddings, valid_dataset)
  similarity = tf.matmul(
      valid_embeddings, normalized_embeddings, transpose_b=True)

  # Add variable initializer.
  init = tf.global_variables_initializer()

In [8]:
# Step 5: Begin training.
num_steps = 500001

with tf.Session(graph=graph) as session:
  # We must initialize all variables before we use them.
  init.run()
  print("Initialized")

  average_loss = 0
  for step in xrange(num_steps):
    batch_inputs, batch_labels = generate_batch(
        batch_size, num_skips, skip_window)
    feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}

    # We perform one update step by evaluating the optimizer op (including it
    # in the list of returned values for session.run()
    _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict)
    average_loss += loss_val

    if step % 5000 == 0:
      if step > 0:
        average_loss /= 5000
      # The average loss is an estimate of the loss over the last 5000 batches.
      print("Average loss at step ", step, ": ", average_loss)
      average_loss = 0

    # Note that this is expensive (~20% slowdown if computed every 500 steps)
    if step % 10000 == 0:
      sim = similarity.eval()
      for i in xrange(valid_size):
        valid_word = reverse_dictionary[valid_examples[i]]
        top_k = 8 # number of nearest neighbors
        nearest = (-sim[i, :]).argsort()[1:top_k + 1]
        log_str = "Nearest to %s:" % valid_word
        for k in xrange(top_k):
          close_word = reverse_dictionary[nearest[k]]
          log_str = "%s %s," % (log_str, close_word)
        print(log_str)
  final_embeddings = normalized_embeddings.eval()

Average loss at step  500000 :  4.81634301996
Nearest to hillary: clinton, clintons, plastic, beat, leadership, failed, her, reset,
Nearest to america: again, make, rich, god, wealthy, great, deals, winning,
Nearest to great: fantastic, again, deals, patriots, america, company, everybody, strong,
Nearest to terrorism: islamic, radical, defeat, defeating, targeted, several, networks, sponsor,
Nearest to press: fake, charges, public, word, news, thats, putting, information,
Nearest to china: mexico, devaluation, moving, lose, currency, jobs, devalue, trades,
Nearest to build: wall, privately, afford, works, bridges, massive, lifted, southern,
Nearest to president: obama, elected, final, being, gave, wants, community, thoughtful,


Average loss at step  495000 :  4.83880538993


Average loss at step  490000 :  4.85903345695
Nearest to hillary: clinton, clintons, she, promises, pledged, her, failed, radical,
Nearest to america: again, wealthy, together, make, rich, american, god, safe,
Nearest to great: thank, a, incredible, country, again, patriots, relationships, have,
Nearest to terrorism: islamic, radical, defeat, defeating, targeted, networks, several, shores,
Nearest to press: fake, charges, word, news, ashamed, trouble, does, bottom,
Nearest to china: japan, mexico, trade, north, south, stealing, korea, lose,
Nearest to build: wall, bridges, massive, built, afford, southern, highways, rebuild,
Nearest to president: elected, obama, being, gave, thoughtful, negotiator, leader, incompetent,


Average loss at step  485000 :  4.89571798024


Average loss at step  480000 :  4.94524160624
Nearest to hillary: clinton, clintons, beat, plastic, ted, pledged, iran, president�,
Nearest to america: again, forgotten, make, wealthy, great, god, american, together,
Nearest to great: again, wow, cheerleader, make, strong, america, fantastic, together,
Nearest to terrorism: islamic, defeat, defeating, several, radical, form, targeted, shores,
Nearest to press: ashamed, fake, charges, news, information, is, word, because,
Nearest to china: mexico, japan, lose, imbalance, trade, korea, north, deficit,
Nearest to build: wall, building, privately, highways, massive, rebuild, factories, preexisting,
Nearest to president: elected, obama, number, terminate, executive, this, but, leader,


Average loss at step  475000 :  4.81747062321


Average loss at step  470000 :  4.83068548136
Nearest to hillary: clinton, clintons, her, beat, plastic, actions, leadership, policies,
Nearest to america: make, again, safe, wealthy, together, inner, god, jobs,
Nearest to great: strong, respect, fantastic, management, very, respected, daughter, conservative,
Nearest to terrorism: islamic, several, radical, defeating, energies, charge, isis, defeat,
Nearest to press: fake, word, ashamed, bottom, public, putting, news, trouble,
Nearest to china: mexico, devaluation, japan, toward, lose, korea, goods, places,
Nearest to build: wall, building, privately, form, built, massive, finished, southern,
Nearest to president: obama, elected, wants, administration, hillary, incompetent, only, thoughtful,


Average loss at step  465000 :  4.83513227386


Average loss at step  460000 :  4.89364269495
Nearest to hillary: clinton, barbarians, book, wants, also, president�, reset, beat,
Nearest to america: again, god, thank, wealthy, bless, rich, forgotten, safe,
Nearest to great: addition, thank, theme, again, substantial, become, conclude, fantastic,
Nearest to terrorism: islamic, radical, defeating, defeat, west, several, nightclub, targeted,
Nearest to press: ashamed, word, bottom, charges, public, honest, fake, news,
Nearest to china: japan, mexico, korea, south, lose, north, devaluation, deal,
Nearest to build: wall, massive, built, rebuild, southern, privately, form, soon,
Nearest to president: obama, elected, leader, has, negotiator, being, incompetent, angry,


Average loss at step  455000 :  4.99948220797


Average loss at step  450000 :  4.89835834141
Nearest to hillary: clinton, wants, clintons, pledged, plastic, she, disastrous, bill,
Nearest to america: again, wealthy, are, god, make, safe, time, forgotten,
Nearest to great: fantastic, before, general, engineer, parents, deals, mattis, utah,
Nearest to terrorism: islamic, several, defeat, defeating, radical, targeted, isis, west,
Nearest to press: ashamed, honest, information, bottom, way, charges, fake, bit,
Nearest to china: japan, mexico, trade, devalue, imbalance, places, terms, lose,
Nearest to build: wall, southern, building, factories, will, bridges, afford, stay,
Nearest to president: obama, elected, executive, terminate, gave, angry, barack, obamas,


Average loss at step  445000 :  4.83536784487


Average loss at step  440000 :  4.86152876396
Nearest to hillary: clinton, clintons, pledged, supports, leadership, beat, obama, promises,
Nearest to america: again, make, rich, wealthy, god, grow, dynamic, together,
Nearest to great: again, make, respect, once, management, company, deals, fantastic,
Nearest to terrorism: islamic, radical, several, defeating, defeat, targeted, worlds, isis,
Nearest to press: fake, charges, news, ashamed, fact, word, public, confidential,
Nearest to china: mexico, japan, lose, devaluation, north, stealing, billion, south,
Nearest to build: wall, southern, form, works, need, be, individual, massive,
Nearest to president: obama, elected, administration, incompetent, being, executive, final, under,


Average loss at step  435000 :  4.87555367265


Average loss at step  430000 :  4.96551367674
Nearest to hillary: clinton, clintons, pledged, department, brutalized, plastic, state, homeland,
Nearest to america: make, again, wealthy, great, god, deals, rich, strong,
Nearest to great: again, make, strong, deals, america, once, thank, wealthy,
Nearest to terrorism: islamic, radical, several, defeating, threat, defeat, now, targeted,
Nearest to press: because, but, ashamed, charges, word, fake, bit, news,
Nearest to china: mexico, japan, trade, south, korea, deficit, lose, imbalance,
Nearest to build: wall, southern, creation, form, will, future, technologies, massive,
Nearest to president: obama, barack, an, abandonment, elected, executive, thoughtful, final,


Average loss at step  425000 :  4.9475421201


Average loss at step  420000 :  4.9138031599
Nearest to hillary: clinton, beat, clintons, her, actions, however, policies, plastic,
Nearest to america: again, make, wealthy, inner, american, rich, forgotten, grow,
Nearest to great: again, make, job, strong, deals, fantastic, everybody, conservative,
Nearest to terrorism: islamic, radical, several, defeat, defeating, west, threat, decisive,
Nearest to press: fake, charges, word, fact, does, confidential, news, putting,
Nearest to china: mexico, japan, overseas, lose, imbalance, moving, pay, bring,
Nearest to build: wall, form, building, pay, wanna, gonna, stay, american,
Nearest to president: obama, elected, incompetent, executive, there, this, administration, terminate,


Average loss at step  415000 :  4.86966386423


Average loss at step  410000 :  4.86961940928
Nearest to hillary: clinton, clintons, her, is, policies, the, by, supports,
Nearest to america: again, make, will, we, rich, wealthy, safe, god,
Nearest to great: again, make, fantastic, god, proud, conservative, deals, america,
Nearest to terrorism: islamic, radical, several, is, threat, on, violence, defeating,
Nearest to press: charges, word, fake, news, fact, confidential, does, national,
Nearest to china: japan, devaluation, companies, conditioning, mexico, moving, lose, trade,
Nearest to build: wall, massive, form, consensus, afford, southern, built, dying,
Nearest to president: obama, elected, final, administration, incompetent, destabilized, be, leader,


Average loss at step  405000 :  4.9536867878


Average loss at step  400000 :  5.00828252172
Nearest to hillary: clinton, beat, wants, barack, race, pledged, polls, clintons,
Nearest to america: again, great, strong, safe, wealthy, rich, together, god,
Nearest to great: again, america, strong, deals, once, wealthy, make, job,
Nearest to terrorism: islamic, west, several, violence, worlds, defeating, threat, decisive,
Nearest to press: charges, information, fake, does, confidential, ahead, word, bit,
Nearest to china: mexico, japan, lose, trade, world, deal, both, devalue,
Nearest to build: wall, ships, will, massive, southern, future, position, need,
Nearest to president: obama, barack, incompetent, angry, elected, he, standards, being,


Average loss at step  395000 :  4.94707392464


Average loss at step  390000 :  4.90829331837
Nearest to hillary: clinton, policies, her, talks, emails, clintons, state, bill,
Nearest to america: again, rich, wealthy, god, safe, bless, great, inner,
Nearest to great: again, make, fantastic, proud, rich, once, strong, america,
Nearest to terrorism: islamic, radical, several, form, defeating, in, worlds, decisive,
Nearest to press: charges, media, confidential, supposed, fake, woman, classified, fact,
Nearest to china: japan, mexico, lose, devalue, stealing, places, terms, overseas,
Nearest to build: afford, massive, wall, bridges, will, we, built, dying,
Nearest to president: administration, obama, obamas, barack, executive, deadly, constitution, orders,


Average loss at step  385000 :  4.91698777876


Average loss at step  380000 :  4.93084705243
Nearest to hillary: clinton, has, promoter, cue, which, clintons, pataki, oppress,
Nearest to america: make, again, together, wealthy, jobs, rich, american, safe,
Nearest to great: again, make, deals, well, once, employ, job, relationships,
Nearest to terrorism: islamic, radical, form, defeating, targeted, west, violence, defeat,
Nearest to press: charges, news, fake, does, media, confidential, word, ashamed,
Nearest to china: japan, mexico, lose, moving, devaluation, power, from, korea,
Nearest to build: wall, make, will, to, be, we, afford, southern,
Nearest to president: obama, elected, as, incompetent, this, leader, done, angry,


Average loss at step  375000 :  4.99983572216


Average loss at step  370000 :  5.03919619861
Nearest to hillary: clinton, clintons, pledged, disastrous, she, bill, beat, emails,
Nearest to america: again, we, make, wealthy, god, thank, will, great,
Nearest to great: make, again, america, conservative, we, truly, deals, wealthy,
Nearest to terrorism: islamic, radical, defeat, west, several, form, either, fight,
Nearest to press: charges, information, fake, news, confidential, ashamed, word, does,
Nearest to china: japan, mexico, lose, trade, devalue, imbalance, bought, deficit,
Nearest to build: wall, will, building, stay, safe, use, be, afford,
Nearest to president: obama, executive, final, ok, roberts, terminate, obamas, but,


Average loss at step  365000 :  4.99690244927


Average loss at step  360000 :  4.91720406823
Nearest to hillary: clinton, her, beat, president, actions, against, administration, supported,
Nearest to america: again, together, rich, make, american, safe, great, wealthy,
Nearest to great: again, proud, strong, make, america, deals, victory, become,
Nearest to terrorism: islamic, radical, west, isis, several, defeating, form, her,
Nearest to press: fake, media, charges, word, fact, news, public, enemy,
Nearest to china: mexico, devaluation, japan, jobs, devalue, their, overseas, trade,
Nearest to build: wall, afford, gonna, dying, be, again, soon, cant,
Nearest to president: obama, incompetent, elected, strategy, hillary, clinton, barack, putin,


Average loss at step  355000 :  4.92998470559


Average loss at step  350000 :  4.97030875478
Nearest to hillary: clinton, clintons, pledged, which, she, beat, promises, uranium,
Nearest to america: again, wealthy, make, rich, god, great, forgotten, deals,
Nearest to great: thank, again, fantastic, make, god, strong, america, proud,
Nearest to terrorism: islamic, radical, west, violence, fight, form, terror, several,
Nearest to press: charges, confidential, does, fake, news, despite, ahead, members,
Nearest to china: japan, devaluation, mexico, trade, south, lose, bought, korea,
Nearest to build: wall, afford, massive, southern, privately, begin, brooklyn, rebuild,
Nearest to president: obama, elected, incompetent, being, final, barack, neil, absolutely,


Average loss at step  345000 :  5.11660697351


Average loss at step  340000 :  5.11722018604
Nearest to hillary: clinton, clintons, plastic, wants, pledged, race, secretary, obamaclinton,
Nearest to america: again, wealthy, forgotten, make, together, great, deals, strong,
Nearest to great: again, conservative, together, make, fantastic, strong, america, will,
Nearest to terrorism: islamic, radical, west, iran, several, form, supports, defeating,
Nearest to press: media, word, information, charges, does, confidential, news, fake,
Nearest to china: mexico, with, lose, pay, terms, now, they, company,
Nearest to build: wall, will, border, southern, massive, building, strong, be,
Nearest to president: stringent, discouraging, weather�, obama, interrupting, shultz, incentive, hitting,


Average loss at step  335000 :  4.91242650805


Average loss at step  330000 :  4.94870121512
Nearest to hillary: clinton, clintons, only, her, under, promises, administration, supports,
Nearest to america: again, make, rich, safe, will, strong, together, wealthy,
Nearest to great: fantastic, make, strong, again, job, proud, black, conservative,
Nearest to terrorism: islamic, radical, nato, that, decisions, changed, several, form,
Nearest to press: media, fact, word, fake, news, does, loving, loved,
Nearest to china: mexico, lose, moving, countries, japan, devaluation, pay, places,
Nearest to build: wall, rebuild, southern, afford, bridges, roads, built, massive,
Nearest to president: obama, elected, strategy, administration, unfortunately, final, eight, deadly,


Average loss at step  325000 :  4.99638153901


Average loss at step  320000 :  5.04060495706
Nearest to hillary: clinton, clintons, bill, islam, she, pledged, supports, her,
Nearest to america: again, make, deals, great, wealthy, god, rich, will,
Nearest to great: again, america, deals, job, make, substantial, all, same,
Nearest to terrorism: islamic, libya, radical, policy, iran, mission, month, form,
Nearest to press: media, doesnt, does, didnt, charges, word, information, but,
Nearest to china: japan, mexico, devaluation, trade, south, lose, korea, north,
Nearest to build: wall, afford, will, southern, rebuild, massive, technologies, begin,
Nearest to president: obama, barack, elected, this, incompetent, angry, being, an,


Average loss at step  315000 :  5.19778379087


Average loss at step  310000 :  5.00459073086
Nearest to hillary: clinton, clintons, beat, gave, obama, policies, policy, recent,
Nearest to america: again, forgotten, american, make, wealthy, allow, inner, protected,
Nearest to great: proud, fantastic, conservative, good, strong, actually, again, lot,
Nearest to terrorism: islamic, radical, defeat, war, several, libya, inaccurately, september,
Nearest to press: information, does, charges, media, way, public, reason, word,
Nearest to china: mexico, japan, lose, trade, pay, terms, overseas, places,
Nearest to build: pay, afford, wall, massive, building, safe, rebuild, weapon,
Nearest to president: obama, elected, eight, terminate, gave, executive, clinton, incompetent,


Average loss at step  305000 :  5.00825257115


Average loss at step  300000 :  4.99999206147
Nearest to hillary: clinton, clintons, policies, leadership, supports, policy, under, power,
Nearest to america: again, make, forgotten, rich, will, our, together, deals,
Nearest to great: proud, respect, again, strong, men, love, fantastic, nation,
Nearest to terrorism: islamic, radical, decisions, islam, either, west, defeating, judgment,
Nearest to press: fake, media, charges, news, information, me, but, fact,
Nearest to china: lose, companies, dont, devaluation, jobs, to, have, our,
Nearest to build: wall, rebuild, afford, need, massive, building, will, safe,
Nearest to president: obama, elected, was, clinton, hillary, he, incompetent, gave,


Average loss at step  295000 :  5.06113676414


Average loss at step  290000 :  5.14570791707
Nearest to hillary: clinton, beat, wants, clintons, reset, amnesty, race, pledged,
Nearest to america: again, great, rich, forgotten, deals, strong, god, wealthy,
Nearest to great: america, again, proud, wealthy, deals, truly, fantastic, victory,
Nearest to terrorism: islamic, against, syria, radical, immigration, decisions, iran, libya,
Nearest to press: because, media, information, charges, fake, news, doesnt, does,
Nearest to china: korea, japan, south, mexico, devaluation, north, company, country,
Nearest to build: wall, massive, will, afford, creation, rebuild, military, southern,
Nearest to president: obama, being, elected, incompetent, angry, an, when, ben,


Average loss at step  285000 :  5.14126453836


Average loss at step  280000 :  5.08983540244
Nearest to hillary: clinton, policies, today, reset, clintons, she, russia, wants,
Nearest to america: again, forgotten, americans, will, country, american, wealthy, party,
Nearest to great: again, strong, fantastic, make, truly, will, matt, conservative,
Nearest to terrorism: islamic, radical, iran, defeat, form, either, several, west,
Nearest to press: media, charges, word, news, information, fake, classified, ok,
Nearest to china: mexico, lose, both, japan, terms, korea, imbalance, losing,
Nearest to build: safe, afford, be, weapon, privately, want, start, wall,
Nearest to president: elected, obama, will, wont, system, incompetent, putin, im,


Average loss at step  275000 :  5.03439874763


Average loss at step  270000 :  5.03934946432
Nearest to hillary: clinton, supports, cue, solution, pataki, scandals, islam, has,
Nearest to america: again, together, deals, make, will, forgotten, wealthy, join,
Nearest to great: make, fantastic, job, again, parents, conservative, can, deals,
Nearest to terrorism: islamic, radical, form, targeted, defeat, several, clintons, iran,
Nearest to press: word, news, media, fake, that, charges, fine, thats,
Nearest to china: devaluation, terms, mexico, world, japan, companies, currency, building,
Nearest to build: wall, will, afford, privately, built, creation, america, pay,
Nearest to president: obama, elected, cannot, putin, long, who, incompetent, leader,


Average loss at step  265000 :  5.18715218892


Average loss at step  260000 :  5.19582224655
Nearest to hillary: clinton, state, bill, clintons, secretary, has, supported, department,
Nearest to america: again, make, wealthy, strong, forgotten, great, all, to,
Nearest to great: again, will, america, once, day, job, strong, fantastic,
Nearest to terrorism: islamic, war, iran, terror, radical, libya, form, early,
Nearest to press: charges, word, news, information, reason, confidential, fake, fine,
Nearest to china: mexico, japan, both, lose, moving, trade, pay, with,
Nearest to build: wall, we, will, afford, building, military, weapon, privately,
Nearest to president: obama, 10, being, year, angry, temperament, was, next,


Average loss at step  255000 :  5.32734915085


Average loss at step  250000 :  5.05391109934
Nearest to hillary: clinton, beat, supports, clintons, has, secretary, policies, bill,
Nearest to america: again, make, american, rich, wealthy, great, god, together,
Nearest to great: again, strong, everybody, make, proud, fantastic, america, once,
Nearest to terrorism: islamic, charge, terror, changed, iran, september, taking, since,
Nearest to press: charges, information, news, fake, media, word, does, confidential,
Nearest to china: mexico, pay, japan, companies, lose, moving, overseas, losing,
Nearest to build: afford, wall, better, pay, cant, privately, make, stay,
Nearest to president: obama, elected, last, wants, businessman, gave, successful, putin,


Average loss at step  245000 :  5.11990003576


Average loss at step  240000 :  5.16534264393
Nearest to hillary: clinton, �, foreign, clintons, supports, under, her, policies,
Nearest to america: again, make, together, our, american, safe, rich, country,
Nearest to great: again, fantastic, well, proud, become, big, strong, my,
Nearest to terrorism: islamic, radical, form, defeat, clinton, state, terror, hillary,
Nearest to press: fake, news, that, because, honestly, media, enemy, should,
Nearest to china: japan, mexico, trade, lose, pay, planes, devaluation, currency,
Nearest to build: wall, afford, begin, massive, we, new, create, rebuilding,
Nearest to president: obama, it, has, elected, being, us, run, when,


Average loss at step  235000 :  5.27808467832


Average loss at step  230000 :  5.53521980715
Nearest to hillary: clinton, wants, against, beat, obama, �, clintons, also,
Nearest to america: again, make, great, together, safe, wealthy, strong, rich,
Nearest to great: again, fantastic, america, make, conservative, matt, men, strong,
Nearest to terrorism: islamic, radical, iran, against, libya, fight, words, also,
Nearest to press: information, but, does, should, media, because, get, news,
Nearest to china: whoa�, demanding, mexico, argument, trades, id, argentina, indexes,
Nearest to build: wall, part, massive, society, will, pay, create, weapon,
Nearest to president: obama, barack, being, respect, could, chance, run, knows,


Average loss at step  225000 :  5.13656441307


Average loss at step  220000 :  5.19313899865
Nearest to hillary: clinton, by, wants, supports, obama, et, her, only,
Nearest to america: again, make, safe, american, rich, great, also, start,
Nearest to great: again, make, strong, safe, america, once, become, will,
Nearest to terrorism: islamic, radical, islam, defeat, words, fight, her, form,
Nearest to press: media, word, reason, thats, news, out, enemy, honest,
Nearest to china: mexico, japan, lose, losing, overseas, pay, countries, places,
Nearest to build: wall, massive, afford, need, pay, building, will, built,
Nearest to president: obama, elected, barack, he, executive, trump, pushed, end,


Average loss at step  215000 :  5.20994779444


Average loss at step  210000 :  5.23053492746
Nearest to hillary: clinton, reset, beat, bill, her, barack, wants, has,
Nearest to america: again, deals, great, make, together, rich, god, our,
Nearest to great: america, deals, love, men, strong, thank, again, border,
Nearest to terrorism: islamic, radical, war, clinton, islam, fight, immigration, form,
Nearest to press: �, so, should, cant, doesnt, word, for, news,
Nearest to china: japan, mexico, korea, countries, trade, south, lose, making,
Nearest to build: wall, massive, built, afford, begin, building, nation, privately,
Nearest to president: been, has, obama, leader, than, run, say, totally,


Average loss at step  205000 :  5.48156604905


Average loss at step  200000 :  5.40010472379
Nearest to hillary: clinton, clintons, state, who, wants, her, by, has,
Nearest to america: again, make, great, safe, strong, together, are, country,
Nearest to great: strong, conservative, america, again, colonels, once, on, matt,
Nearest to terrorism: islamic, radical, war, form, against, libya, her, syria,
Nearest to press: word, honest, it, fine, think, a, important, does,
Nearest to china: mexico, terms, moving, pay, lose, places, losing, us,
Nearest to build: massive, wall, pay, afford, building, need, wanna, create,
Nearest to president: obama, elected, community, by, does, running, barack, then,


Average loss at step  195000 :  5.27853415542


Average loss at step  190000 :  5.21016178832
Nearest to hillary: clinton, her, beat, generals, amnesty, under, supports, today,
Nearest to america: again, make, rich, together, deals, want, bigger, worker,
Nearest to great: make, again, proud, company, unbelievable, built, deals, fair,
Nearest to terrorism: islamic, radical, even, lots, made, of, history, nato,
Nearest to press: word, honest, fake, thats, news, fine, why, does,
Nearest to china: mexico, pay, trade, losing, moving, both, lose, making,
Nearest to build: wall, built, massive, afford, building, make, work, privately,
Nearest to president: obama, elected, barack, who, community, wants, also, has,


Average loss at step  185000 :  5.27174312224


Average loss at step  180000 :  5.48803457808
Nearest to hillary: clinton, clintons, amnesty, has, her, also, power, she,
Nearest to america: rich, make, again, going, start, protect, together, better,
Nearest to great: again, make, strong, safe, well, thank, everybody, much,
Nearest to terrorism: islamic, radical, entire, one, �, refugees, secretary, form,
Nearest to press: it, but, news, fake, because, word, right, fine,
Nearest to china: mexico, places, building, world, lose, which, trade, from,
Nearest to build: wall, built, massive, afford, building, form, will, begin,
Nearest to president: obama, an, respect, when, and, of, ok, barack,


Average loss at step  175000 :  5.53553254266


Average loss at step  170000 :  5.55099706368
Nearest to hillary: clinton, beat, against, reset, clintons, polls, wants, she,
Nearest to america: again, make, country, rich, strong, americans, deals, economy,
Nearest to great: strong, make, up, also, a, thought, proud, and,
Nearest to terrorism: islamic, radical, fight, war, even, against, were, taking,
Nearest to press: word, fine, media, and, know, news, information, fake,
Nearest to china: mexico, countries, lose, japan, from, pay, moving, them,
Nearest to build: wall, building, massive, pay, built, be, will, up,
Nearest to president: elected, chance, also, putin, has, angry, change, temperament,


Average loss at step  165000 :  5.26370629225


Average loss at step  160000 :  5.42828168836
Nearest to hillary: clinton, beat, amnesty, under, which, clintons, policy, her,
Nearest to america: make, again, deals, will, american, together, win, anymore,
Nearest to great: unbelievable, job, decision, again, proud, conservative, fantastic, way,
Nearest to terrorism: radical, islamic, against, her, be, islam, decision, fight,
Nearest to press: word, why, honest, fake, fact, think, news, the,
Nearest to china: overseas, countries, advantage, spending, weve, into, places, lose,
Nearest to build: wall, built, we, will, building, afford, have, begin,
Nearest to president: obama, respect, under, also, elected, another, point, only,


Average loss at step  155000 :  5.45292040296


Average loss at step  150000 :  5.74600939331
Nearest to hillary: clinton, clintons, biggest, amnesty, cost, record, has, she,
Nearest to america: again, make, strong, great, together, become, the, start,
Nearest to great: again, america, strong, make, deals, our, will, become,
Nearest to terrorism: radical, islamic, state, syria, fight, of, terms, entire,
Nearest to press: should, know, news, nice, fake, against, now, part,
Nearest to china: mexico, japan, both, , deal, every, better, pay,
Nearest to build: wall, be, we, pay, military, massive, building, ,
Nearest to president: obama, ran, thought, job, decision, temperament, conservative, hes,


Average loss at step  145000 :  5.72039928408


Average loss at step  140000 :  5.63649839811
Nearest to hillary: clinton, while, radical, russia, clintons, even, power, amnesty,
Nearest to america: of, we, american, again, make, together, our, rich,
Nearest to great: again, make, strong, im, and, parents, everybody, good,
Nearest to terrorism: islamic, radical, fight, one, day, terms, who, islam,
Nearest to press: media, honestly, word, means, real, does, much, fine,
Nearest to china: but, places, where, both, them, mexico, japan, and,
Nearest to build: will, afford, pay, wall, built, massive, building, and,
Nearest to president: be, decision, totally, also, executive, this, elected, wont,


Average loss at step  135000 :  5.6876985847


Average loss at step  130000 :  5.66183268695
Nearest to hillary: clinton, clintons, her, beat, supported, amnesty, only, foreign,
Nearest to america: make, again, deals, will, to, together, be, we,
Nearest to great: making, make, fair, deals, again, how, pay, leave,
Nearest to terrorism: islamic, radical, state, terror, many, terms, fight, of,
Nearest to press: honestly, media, does, fake, word, news, fact, on,
Nearest to china: mexico, trade, pay, its, japan, countries, building, both,
Nearest to build: wall, massive, built, afford, pay, building, be, great,
Nearest to president: obama, this, respect, hes, barack, being, thought, elected,


Average loss at step  125000 :  5.61033539748


Average loss at step  120000 :  6.41131247201
Nearest to hillary: amnesty, clintons, change, clinton, state, even, she, politicians,
Nearest to america: make, again, strong, we, great, so, rich, thank,
Nearest to great: make, again, america, strong, we, so, chance, safe,
Nearest to terrorism: islamic, terror, �, nation, there, have, is, the,
Nearest to press: fake, word, news, does, fact, why, media, doesnt,
Nearest to china: mexico, trade, other, spent, with, from, japan, everything,
Nearest to build: wall, use, need, make, built, afford, wanna, building,
Nearest to president: thought, next, obama, hes, number, chance, but, could,


Average loss at step  115000 :  6.06758433471


Average loss at step  110000 :  5.68116466265
Nearest to hillary: clinton, beat, even, has, words, already, clintons, through,
Nearest to america: again, strong, great, make, safe, start, rich, americans,
Nearest to great: america, strong, again, life, everybody, make, this, thank,
Nearest to terrorism: islamic, radical, plan, iran, fight, her, record, one,
Nearest to press: fact, media, through, fake, honestly, news, public, honest,
Nearest to china: mexico, countries, pay, from, jobs, their, places, make,
Nearest to build: going, wall, be, pay, afford, start, military, gonna,
Nearest to president: obama, even, absolutely, which, done, well, got, change,


Average loss at step  105000 :  5.72811724987


Average loss at step  100000 :  5.83446622276
Nearest to hillary: clinton, power, she, beat, through, that, clintons, foreign,
Nearest to america: again, rich, first, deals, will, safe, nation, we,
Nearest to great: probably, but, today, question, true, good, tremendous, fair,
Nearest to terrorism: islamic, radical, plan, the, deal, for, under, since,
Nearest to press: honestly, because, media, public, process, fake, even, honest,
Nearest to china: trade, countries, deal, advantage, giving, places, having, that,
Nearest to build: built, wall, pay, start, building, taking, we, nation,
Nearest to president: today, obama, his, being, totally, probably, maybe, executive,


Average loss at step  95000 :  6.21603485341


Average loss at step  90000 :  6.57239083223
Nearest to hillary: wants, clinton, beat, even, power, also, made, i,
Nearest to america: again, make, great, rich, protect, american, americans, our,
Nearest to great: again, strong, make, america, nation, will, big, proud,
Nearest to terrorism: islamic, done, change, �, plan, deal, clintons, since,
Nearest to press: does, even, had, that, fine, media, ok, how,
Nearest to china: mexico, them, even, deal, theyre, cant, agree, communities,
Nearest to build: wall, will, wanna, now, start, pay, business, nation,
Nearest to president: was, for, chance, being, obama, running, number, show,


Average loss at step  85000 :  6.14108995485


Average loss at step  80000 :  6.11994754281
Nearest to hillary: clinton, has, that, heard, beat, wants, against, polls,
Nearest to america: make, again, great, rich, start, we, shouldnt, to,
Nearest to great: make, deals, again, to, tremendous, im, its, rich,
Nearest to terrorism: islamic, radical, on, policy, all, life, then, plan,
Nearest to press: fake, news, word, does, how, media, fact, honest,
Nearest to china: mexico, their, jobs, them, day, up, countries, on,
Nearest to build: wall, will, gonna, health, pay, built, a, for,
Nearest to president: year, obama, room, my, end, national, wont, maybe,


Average loss at step  75000 :  6.29876050978


Average loss at step  70000 :  6.30412472086
Nearest to hillary: clinton, clintons, policy, her, number, and, has, beat,
Nearest to america: make, again, safe, deals, great, protect, build, work,
Nearest to great: deals, again, strong, make, care, were, but, country,
Nearest to terrorism: islamic, radical, first, on, wants, against, policy, has,
Nearest to press: media, through, cant, out, does, absolutely, fair, fake,
Nearest to china: mexico, deal, these, were, cant, them, with, because,
Nearest to build: wall, were, start, health, be, going, whole, need,
Nearest to president: obama, that, he, has, an, this, nobody, and,


Average loss at step  65000 :  6.88574551382


Average loss at step  60000 :  6.78583353181
Nearest to hillary: clinton, policy, beat, heard, against, she, nobody, frankly,
Nearest to america: we, again, will, to, make, were, protect, too,
Nearest to great: actually, well, and, people, we, here, good, very,
Nearest to terrorism: islamic, wants, against, needs, war, america, put, in,
Nearest to press: public, system, know, media, fair, believe, is, that,
Nearest to china: us, countries, mexico, know, we, trade, israel, them,
Nearest to build: wall, gonna, its, big, start, now, for, work,
Nearest to president: obama, thought, started, then, did, different, even, said,


Average loss at step  55000 :  7.04289330006


Average loss at step  50000 :  6.85895805559
Nearest to hillary: clinton, has, on, another, had, that, beat, national,
Nearest to america: again, make, deals, rich, great, start, military, country,
Nearest to great: not, do, my, something, deals, to, were, go,
Nearest to terrorism: next, doing, companies, im, then, national, ok, something,
Nearest to press: media, public, does, would, news, absolutely, tremendous, assad,
Nearest to china: countries, mexico, pay, in, up, �, them, because,
Nearest to build: wall, start, will, or, need, tremendous, making, all,
Nearest to president: his, obama, went, him, she, being, anything, he,


Average loss at step  45000 :  7.04824896131


Average loss at step  40000 :  7.59911329737
Nearest to hillary: clinton, beat, she, today, said, went, nothing, not,
Nearest to america: experiencing, sailors, hunker, battered, vigorously, capacity, months�, engineer,
Nearest to great: deals, our, again, were, deal, country, border, system,
Nearest to terrorism: renegotiated, category, magnitude, whoa�, change, iss, klu, onemonth,
Nearest to press: wouldnt, protect, doesnt, immigration, frankly, happened, want, news,
Nearest to china: politicians, be, city, are, where, happened, whats, should,
Nearest to build: wall, as, cant, pay, but, better, doesnt, nuclear,
Nearest to president: obama, its, came, has, most, fact, city, his,


Average loss at step  35000 :  8.43609101262


Average loss at step  30000 :  9.00504323177
Nearest to hillary: two, and, made, or, russia, talk, clinton, american,
Nearest to america: make, again, great, already, then, american, stop, out,
Nearest to great: job, well, your, am, make, very, better, america,
Nearest to terrorism: plan, and, when, something, with, even, radical, countries,
Nearest to press: devaluing, horribly, assad, happened, nervous, rip, institutions, c�,
Nearest to china: made, us, trade, number, with, or, mexico, frankly,
Nearest to build: tremendous, nuclear, so, start, doesnt, problem, far, if,
Nearest to president: really, a, this, hes, mean, doesnt, little, doing,


Average loss at step  25000 :  9.4270180737


Average loss at step  20000 :  9.86896980348
Nearest to hillary: out, her, just, that, has, �, by, clinton,
Nearest to america: so, again, if, great, they, deal, american, than,
Nearest to great: very, from, most, more, america, big, so, deal,
Nearest to terrorism: bring, first, florida, work, so, everybody, could, how,
Nearest to press: action, established, suffering, disgraceful, bond, tall, backwards, forming,
Nearest to china: put, now, almost, they, into, have, through, their,
Nearest to build: your, think, let, been, dont, but, not, very,
Nearest to president: people, didnt, could, even, i, doing, up, deal,


Average loss at step  15000 :  11.7614529788


Average loss at step  10000 :  13.9649899966
Nearest to hillary: clinton, cant, also, most, not, said, just, never,
Nearest to america: make, who, other, great, are, have, into, so,
Nearest to great: will, just, lot, bad, who, one, well, many,
Nearest to terrorism: sat, next, that�, caterpillar, extremely, opinion, crime, instantly,
Nearest to press: youth, forming, enforcement, debating, con, funneled, banking, haiti,
Nearest to china: need, our, into, not, those, will, clinton, were,
Nearest to build: b52, following, cuban, hire, mitt, saving, cares, tougher,
Nearest to president: their, at, im, by, have, them, would, make,


Average loss at step  5000 :  29.719666335


Initialized
Average loss at step  0 :  212.514663696
Nearest to hillary: unemployed, nominated, fair, wave, abused, pretense, returning, border,
Nearest to america: addition, condone, boldly, robocalls, wins, conditions, nabisco, jobs�,
Nearest to great: disappear, rates, write�, saves, providing, wisconsin, else�, concern,
Nearest to terrorism: stages, motor, trapped, lifelong, model, drawer, fairness, indonesia,
Nearest to press: vigorously, crossings, enforcement, thrilled, 6, outside, nervous, carefully,
Nearest to china: integration, covers, 31, heights, protecting, onemonth, molestation, you,
Nearest to build: interruptions, lowering, sarel, resulted, martyrs, ashamed, screening, framework,
Nearest to president: facilitator, weather, theyll, pigs�, ruling, assad, head, couldve,


In [9]:
num_points = 1000
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
two_d_embeddings = tsne.fit_transform(final_embeddings[1:num_points+1, :])

In [0]:
def plot(embeddings, labels):
  assert embeddings.shape[0] >= len(labels), 'More labels than embeddings'
  pylab.figure(figsize=(15,15))  # in inches
  for i, label in enumerate(labels):
    x, y = embeddings[i,:]
    pylab.scatter(x, y)
    pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points',
                   ha='right', va='bottom')
  pylab.show()

words = [reverse_dictionary[i] for i in range(1, num_points+1)]
plot(two_d_embeddings, words)