In [1]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import torch
import numpy as np
import pandas as pd
import plotly.express as px

In [53]:
# Config
embedding_location = "../outputs/vq_vae_v5.8/encodings.pt"
# embedding_location = "../outputs/ablation_vq_vae_v2.2/encodings.pt"
# embedding_location = "../outputs/base_convolutional_vae_v1/embeddings.pt"
# embedding_location = "../outputs/sprites_vq_vae_v2/encodings.pt"
# embedding_location = "../outputs/old_sprites_ablation_vq_vae_v2.2/encodings.pt"
# embedding_location = "../outputs/base_sprites_convolutional_vae_v1/embeddings.pt"
mode = "pokemon" # pokemon, sprites, tinyhero
# For Pokemon: height, weight, type1, type2, egg1, egg2, bmi;
# For sprites: id, pose, animation
label = "type1" 

In [54]:
checkpoint = torch.load(embedding_location)
embeddings = checkpoint['embeddings']
filenames = checkpoint['filenames']
meta_information = checkpoint['color']

In [55]:
# Config
num_embeddings = embeddings.shape[0]
embeddings = embeddings.reshape(num_embeddings, -1)
# embeddings = embeddings * 256 # Use with encodings; swap the number for num_classes

In [56]:
if mode == "pokemon":
    all_colors = pd.DataFrame(meta_information, columns=["height", "weight", "type1", "type2", "egg1", "egg2"])
    if label in ["height", "weight"]:
        colors = pd.qcut(all_colors[label], 10)
    elif label == "bmi":
        colors = pd.qcut(all_colors["weight"] / all_colors["height"], 10)
    else:
        colors = all_colors[label]
elif mode == "sprites":
    all_colors = pd.DataFrame(meta_information, columns=["id", "pose", "animation"])
    colors = all_colors[label]
else:
    colors = meta_information
    label = "pose"

In [57]:
embeddings.shape

torch.Size([740, 1024])

In [58]:
embeddings[0]

tensor([0.5938, 0.5938, 0.5938,  ..., 0.5938, 0.5938, 0.5938])

# 2D t-SNE

In [60]:
tsne = TSNE(n_components=2, init='pca', perplexity=10, learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 740 samples in 0.002s...
[t-SNE] Computed neighbors for 740 samples in 0.046s...
[t-SNE] Computed conditional probabilities for sample 740 / 740
[t-SNE] Mean sigma: 0.762981



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



[t-SNE] KL divergence after 250 iterations with early exaggeration: 122.393417
[t-SNE] KL divergence after 1000 iterations: 2.932643


In [61]:
fig = px.scatter(tsne, x=0, y=1, color=colors, labels={'color': label}, hover_name=filenames, title='2D t-SNE')
fig.show()

# 2D PCA

In [25]:
pca = PCA().fit_transform(embeddings)

In [26]:
fig = px.scatter(pca, x=0, y=1, color=colors, labels={'color': label}, hover_name=filenames, title="2D PCA")
fig.show()

# 3D t-SNE

In [25]:
tsne = TSNE(n_components=3, init='pca', learning_rate='auto', verbose=1).fit_transform(embeddings)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 3648 samples in 0.168s...
[t-SNE] Computed neighbors for 3648 samples in 8.715s...
[t-SNE] Computed conditional probabilities for sample 1000 / 3648
[t-SNE] Computed conditional probabilities for sample 2000 / 3648
[t-SNE] Computed conditional probabilities for sample 3000 / 3648
[t-SNE] Computed conditional probabilities for sample 3648 / 3648
[t-SNE] Mean sigma: 15.275189



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



[t-SNE] KL divergence after 250 iterations with early exaggeration: 67.489899
[t-SNE] KL divergence after 1000 iterations: 1.014009


In [26]:
fig = px.scatter_3d(tsne, x=0, y=1, z=2, color=colors, labels={'color': label}, hover_name=filenames, title='3D t-SNE')
fig.show()