In [1]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import torch
import numpy as np
import pandas as pd
import plotly.express as px

In [46]:
# Config
# embedding_location = "../outputs/vq_vae_v5.8/embeddings.pt"
embedding_location = "../outputs/vq_vae_v5.8/encodings.pt"
# embedding_location = "../outputs/convolutional_vae_v16.5/embeddings.pt"
# embedding_location = "../outputs/tinyhero_vq_vae_v2.1/embeddings.pt"
# embedding_location = "../outputs/tinyhero_vq_vae_v2.1/encodings.pt"
pokemon = True

In [98]:
checkpoint = torch.load(embedding_location)
embeddings = checkpoint['embeddings']
filenames = checkpoint['filenames']
meta_information = checkpoint['color']

In [104]:
# Config
num_embeddings = embeddings.shape[0]
embeddings = embeddings.reshape(num_embeddings, -1)
# embeddings = embeddings * 512 # Use with encodings; swap the number for num_classes
label = "bmi" # For Pokemon: height, weight, type1, type2, egg1, egg2, bmi; else "pose"

In [107]:
if pokemon:
    all_colors = pd.DataFrame(meta_information, columns=["height", "weight", "type1", "type2", "egg1", "egg2"])
    if label in ["height", "weight"]:
        colors = pd.qcut(all_colors[label], 10)
    elif label == "bmi":
        colors = pd.qcut(all_colors["weight"] / all_colors["height"], 10)
    else:
        colors = all_colors[label]
else:
    colors = meta_information

In [108]:
colors.shape

(740,)

In [109]:
embeddings.shape

torch.Size([740, 1024])

In [110]:
embeddings[0]

tensor([0.5938, 0.5938, 0.5938,  ..., 0.5938, 0.5938, 0.5938])

# 2D t-SNE

In [53]:
tsne = TSNE(n_components=2, init='pca', perplexity=50, learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 151 nearest neighbors...
[t-SNE] Indexed 740 samples in 0.003s...
[t-SNE] Computed neighbors for 740 samples in 0.133s...
[t-SNE] Computed conditional probabilities for sample 740 / 740
[t-SNE] Mean sigma: 1.092128




[t-SNE] KL divergence after 250 iterations with early exaggeration: 89.222946
[t-SNE] KL divergence after 1000 iterations: 1.769850


In [111]:
fig = px.scatter(tsne, x=0, y=1, color=colors, labels={'color': label}, hover_name=filenames, title='2D t-SNE')
fig.show()

# 2D PCA

In [36]:
pca = PCA().fit_transform(embeddings)

In [37]:
fig = px.scatter(pca, x=0, y=1, color=colors, labels={'color': label}, hover_name=filenames, title="2D PCA")
fig.show()

# 3D t-SNE

In [25]:
tsne = TSNE(n_components=3, init='pca', learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 3648 samples in 0.168s...
[t-SNE] Computed neighbors for 3648 samples in 8.715s...
[t-SNE] Computed conditional probabilities for sample 1000 / 3648
[t-SNE] Computed conditional probabilities for sample 2000 / 3648
[t-SNE] Computed conditional probabilities for sample 3000 / 3648
[t-SNE] Computed conditional probabilities for sample 3648 / 3648
[t-SNE] Mean sigma: 15.275189



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



[t-SNE] KL divergence after 250 iterations with early exaggeration: 67.489899
[t-SNE] KL divergence after 1000 iterations: 1.014009


In [26]:
fig = px.scatter_3d(tsne, x=0, y=1, z=2, color=colors, labels={'color': label}, hover_name=filenames, title='3D t-SNE')
fig.show()