In [1]:
import json 
import numpy as np 
import tensorflow as tf

In [None]:
def get_dicts():
    with open('vocab_dictionary.json', 'r') as f:
        vocab_dictionary = json.load(f)
    with open('reversed_dictionary.json', 'r') as f:
        reversed_dictionary = json.load(f)
    with open('wordcount.json', 'r') as f:
        word_count = json.load(f)
    with open('embeddings.json', 'r') as f:
        final_embeddings = json.load(f)
    return vocab_dictionary, reversed_dictionary, word_count, final_embeddings

In [None]:
vocab_dictionary, reversed_dictionary, word_count, final_embeddings = get_dicts()

## Computes cosine similarity between two vectors

In [2]:
def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

## Function finds the closest word embeddings to a given word and returns their indices

In [3]:
def find_closest(embeddings, word, index_to_word, word_to_index, n_words, count):
    assert word in word_to_index, 'Unknown word'
    
    print("Word occurs: " + str(count[word_to_index[word]][1]) + " times.\n")
    
    word_embedding = embeddings[word_to_index.get(word)]
    
    distances = np.sum((embeddings - word_embedding) ** 2, axis=1)
    
    indices = np.argsort(distances)[:n_words]
    for i in indices:
        print(index_to_word.get(i, "UNK"))
    
    return indices

In [None]:
find_closest(final_embeddings, "cowboy", reversed_dictionary, vocab_dictionary, 20, word_count)

## Plots TSNE for most common words

In [None]:
def plot_with_labels(low_dim_embs, labels, filename):
    assert low_dim_embs.shape[0] >= len(labels), 'More labels than embeddings'
    plt.figure(figsize=(18, 18))  # in inches
    for i, label in enumerate(labels):
        x, y = low_dim_embs[i, :]
        plt.scatter(x, y)
        plt.annotate(label, xy = (x, y), xytext = (5, 2), textcoords = 'offset points', ha='right', va='bottom')

In [None]:
try:
    # pylint: disable=g-import-not-at-top
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt

    tsne = TSNE(
        perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
    plot_only = 500
    low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
    labels = [reversed_dictionary[i] for i in xrange(plot_only)]
    plot_with_labels(low_dim_embs, labels, os.path.join(gettempdir(),
                                                        'tsne.png'))

except ImportError as ex:
    print('Please install sklearn, matplotlib, and scipy to show embeddings.')
    print(ex)