[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jaidevd/ts-dataviz/blob/master/03_manifold_embedding.ipynb)

In [None]:
# !pip install gensim, scikit-learn

Download data from [here](https://drive.google.com/open?id=1e4FUscAAgl-nV2y5xkobfWc4yN6R_BTD).

In [None]:
import json
import numpy as np
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap, TSNE

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [None]:
with open('data/words.json', 'r') as fout:
    mb = json.load(fout)

In [None]:
w2v = Word2Vec(mb, size=128, min_count=20, workers=4)

In [None]:
X = w2v.wv.vectors
X.shape

In [None]:
characters = [
    'Krishna', 'Bhishma', 'Vidura', 'Dhritarashtra', 'Gandhari', 'Kunti',
    'Yudhishthira', 'Bhima', 'Arjuna', 'Nakula', 'Sahadeva',
    'Draupadi',
    'Duryodhana', 'Dussasana', 'Karna'
]
for c in characters:
    if c not in w2v.wv.vocab:
        print(c)

In [None]:
w2v.wv.most_similar('Krishna')

In [None]:
vectors = []
chars = set()
for c in characters:
    for ms, _ in w2v.wv.most_similar(c, topn=20):
        chars.add(ms)

chars = list(chars)
X = np.zeros((len(chars), 128))
for i, c in enumerate(chars):
    X[i] = w2v.wv[c]

In [None]:
X.shape

In [None]:
print(chars)

In [None]:
def plot_embeddings(chars, vectors):
    fig, ax = plt.subplots(figsize=(10, 10))
    for c, loc in zip(chars, vectors):
        ax.annotate(c, loc, fontsize='large')
    xmin, ymin = vectors.min(axis=0) - 0.5
    xmax, ymax = vectors.max(axis=0) + 0.5
    plt.axis([xmin, xmax, ymin, ymax])

# PCA

In [None]:
pca = PCA(n_components=2)
x_red = pca.fit_transform(X)

In [None]:
plot_embeddings(chars, x_red)

# Isomap

In [None]:
isomap = Isomap(n_components=2)
x_red = isomap.fit_transform(X)
plot_embeddings(chars, x_red)

# T-SNE

In [None]:
tsne = TSNE(n_components=2)
x_red = tsne.fit_transform(X)
plot_embeddings(chars, x_red)