In [1]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import torch
import numpy as np
import plotly.express as px

In [2]:
# Config
# embedding_location = "../outputs/vq_vae_v5.8/embeddings.pt"
# embedding_location = "../outputs/vq_vae_v5.8/encodings.pt"
embedding_location = "../outputs/convolutional_vae_v16.5/embeddings.pt"

In [3]:
checkpoint = torch.load(embedding_location)
embeddings = checkpoint['embeddings']
filenames = checkpoint['filenames']
pokemon_types = checkpoint['pokemon_types']
num_embeddings = embeddings.shape[0]
# embeddings = embeddings.reshape(num_embeddings, -1)
# embeddings = embeddings * 256

In [13]:
embeddings.shape

torch.Size([740, 256])

# 2D t-SNE

In [23]:
tsne = TSNE(n_components=2, init='pca', learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 740 samples in 0.001s...
[t-SNE] Computed neighbors for 740 samples in 0.032s...
[t-SNE] Computed conditional probabilities for sample 740 / 740
[t-SNE] Mean sigma: 3.907058



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



[t-SNE] KL divergence after 250 iterations with early exaggeration: 79.624786
[t-SNE] KL divergence after 1000 iterations: 2.183717


In [24]:
fig = px.scatter(tsne, x=0, y=1, color=pokemon_types, labels={'color': 'type'}, hover_name=filenames, title='2D t-SNE')
fig.show()

# 2D PCA

In [9]:
pca = PCA().fit_transform(embeddings)

In [17]:
fig = px.scatter(pca, x=0, y=1, color=pokemon_types, labels={'color': 'type'}, hover_name=filenames, title="2D PCA")
fig.show()

# 3D t-SNE

In [19]:
tsne = TSNE(n_components=3, init='pca', learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 740 samples in 0.001s...
[t-SNE] Computed neighbors for 740 samples in 0.027s...
[t-SNE] Computed conditional probabilities for sample 740 / 740
[t-SNE] Mean sigma: 3.907058



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



[t-SNE] KL divergence after 250 iterations with early exaggeration: 98.731110
[t-SNE] KL divergence after 1000 iterations: 2.820067


In [20]:
fig = px.scatter_3d(tsne, x=0, y=1, z=2, color=pokemon_types, labels={'color': 'type'}, hover_name=filenames, title='3D t-SNE')
fig.show()