# Embeddings visualization notebook

uses t-sne to visualize embeddings.
embedding visualization lead to the following changes in the model architecture:

- biased initialization, where similar classes of champions(mages, assasins etc.) are initialised with similar embeddings
- adjacent patch regularization, where patch and champion-patch embeddings are forced to be close to each other for adjacent patches

Before these changes the embeddings were normally distributed, afterwards they can be interpreted.
Model validation also improved very slightly, model "vibe check" improves quite a bit, the model predictions seems more human after these changes.


In [None]:
import numpy as np
import torch
import pickle
from sklearn.manifold import TSNE
import plotly.express as px
import pandas as pd
from pathlib import Path

# Import your Model class and utilities
from utils.match_prediction.model import Model
from utils.match_prediction import (
    MODEL_PATH,
    CHAMPION_ID_ENCODER_PATH,
    MODEL_CONFIG_PATH,
    PATCH_MAPPING_PATH,
    get_best_device,
    load_model_state_dict,
)
from utils.match_prediction.champions import Champion
from utils.match_prediction.config import TrainingConfig

# Load model and move to device
device = get_best_device()

# Load label encoders
with open(CHAMPION_ID_ENCODER_PATH, "rb") as f:
    champion_id_encoder = pickle.load(f)["mapping"]

# Load model configuration
with open(MODEL_CONFIG_PATH, "rb") as f:
    model_params = pickle.load(f)

with open(PATCH_MAPPING_PATH, "rb") as f:
    patch_mapping = pickle.load(f)["mapping"]

config = TrainingConfig()

# Initialize model
model = Model(
    config=config,
    dropout=model_params["dropout"],
    hidden_dims=model_params["hidden_dims"],
)

model = load_model_state_dict(model, path=MODEL_PATH, device=device)
model.eval()

# Extract champion-patch embeddings
embeddings = model.champion_patch_embedding.weight.detach().cpu().numpy()
num_patches = len(patch_mapping)
print(f"num_patches: {num_patches}")
num_champions = model.num_champions


# Create mapping from champion ID to name
id_to_name = {champ.id: champ.display_name for champ in Champion}

# Get patch values from model
patch_values = np.array(list(patch_mapping.keys()))
print(patch_values)

In [None]:
# Prepare data for visualization
champion_patch_data = []
for champ_idx in range(num_champions):
    champ_id = champion_id_encoder.classes_[champ_idx]
    try:
        champ_id = int(champ_id)
        champ_name = id_to_name.get(champ_id, f"Unknown ID {champ_id}")
    except ValueError:
        champ_name = str(champ_id)

    for patch_idx in range(num_patches):
        embed_idx = champ_idx * num_patches + patch_idx
        patch_value = patch_values[patch_idx]
        embedding = embeddings[embed_idx]

        champion_patch_data.append(
            {
                "embedding": embedding,
                "champion": champ_name,
                "patch": patch_value,
                "label": f"{champ_name} ({patch_value})",
            }
        )

# Convert embeddings to 2D using t-SNE
all_embeddings = np.stack([data["embedding"] for data in champion_patch_data])
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(all_embeddings)

# Create DataFrame for plotting
df = pd.DataFrame(
    {
        "x": embeddings_2d[:, 0],
        "y": embeddings_2d[:, 1],
        "Champion": [data["champion"] for data in champion_patch_data],
        "Patch": [data["patch"] for data in champion_patch_data],
        "Label": [data["label"] for data in champion_patch_data],
    }
)

# Create interactive plot with Plotly
fig = px.scatter(
    df,
    x="x",
    y="y",
    color="Champion",
    hover_data=["Champion", "Patch"],
    title="Champion-Patch Embedding Visualization (t-SNE)",
    labels={"x": "t-SNE Component 1", "y": "t-SNE Component 2"},
)

# Update traces for better visualization
fig.update_traces(marker=dict(size=8), selector=dict(mode="markers"))

# Update layout
fig.update_layout(
    showlegend=True,
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=1.02),
    width=1200,
    height=800,
)

# Show plot
fig.show()

# Optional: Calculate and display patch-to-patch distances for each champion
print("\nAnalyzing patch-to-patch distances for champions:")
for champ_idx in range(num_champions):
    champ_id = champion_id_encoder.classes_[champ_idx]
    try:
        champ_id = int(champ_id)
        champ_name = id_to_name.get(champ_id, f"Unknown ID {champ_id}")
    except ValueError:
        champ_name = str(champ_id)

    patch_embeds = embeddings[champ_idx * num_patches : (champ_idx + 1) * num_patches]
    max_dist = np.max(np.linalg.norm(patch_embeds[:, None] - patch_embeds, axis=2))
    print(f"{champ_name}: Max distance between patches = {max_dist:.4f}")

# Champions 3D visualization

In [None]:
# Perform t-SNE dimensionality reduction to 3D
from sklearn.manifold import TSNE
import plotly.express as px
import pandas as pd

# Create t-SNE embeddings
tsne = TSNE(n_components=3, random_state=42, perplexity=30)
embeddings_3d = tsne.fit_transform(
    embeddings
)  # shape: (num_champions * num_patches, 3)

In [None]:
# Prepare data for plotting
plot_data = []
for champ_idx in range(num_champions):
    champ_id = champion_id_encoder.classes_[champ_idx]
    try:
        champ_id = int(champ_id)
        champ_name = id_to_name.get(champ_id, f"Unknown ID {champ_id}")
    except ValueError:
        champ_name = str(champ_id)
    for patch_idx, patch in enumerate(patch_mapping.values()):
        idx = champ_idx * num_patches + patch_idx
        plot_data.append(
            {
                "x": embeddings_3d[idx, 0],
                "y": embeddings_3d[idx, 1],
                "z": embeddings_3d[idx, 2],
                "champion": champ_name,
                "patch": patch,
            }
        )

df = pd.DataFrame(plot_data)

# Create 3D scatter plot
fig = px.scatter_3d(
    df,
    x="x",
    y="y",
    z="z",
    color="champion",
    hover_data=["champion", "patch"],
    title="3D t-SNE Visualization of Champion-Patch Embeddings",
)

# Update traces to be hidden by default
fig.update_traces(
    marker=dict(size=5), visible="legendonly"  # This makes all traces hidden by default
)

# Update layout for better visualization
fig.update_layout(
    scene=dict(xaxis_title="t-SNE 1", yaxis_title="t-SNE 2", zaxis_title="t-SNE 3"),
    # Add buttons for show/hide all
    width=1400,  # default is usually around 700-800
    height=1000,  # default is usually around 600
    updatemenus=[
        dict(
            type="buttons",
            showactive=False,
            buttons=[
                dict(label="Show All", method="update", args=[{"visible": True}]),
                dict(
                    label="Hide All", method="update", args=[{"visible": "legendonly"}]
                ),
            ],
            x=1.1,
            y=1.1,
            xanchor="right",
            yanchor="top",
        )
    ],
)

fig.show()

# Patch embed visualization


In [None]:
import torch
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


# Assuming 'model' is your trained model instance
def visualize_patch_embeddings(model: Model) -> None:
    # Get the patch embedding weights
    patch_embeddings = (
        model.patch_embedding.weight.detach().cpu().numpy()
    )  # Shape: (num_patches, embed_dim)

    # Calculate appropriate perplexity (rule of thumb: n_samples / 3)
    n_samples = len(model.patch_mapping)
    perplexity = min(30, n_samples // 3)

    # Apply t-SNE
    tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
    patch_embeddings_2d = tsne.fit_transform(patch_embeddings)

    # Create a mapping from patch index to patch version
    patch_to_version = {v: k for k, v in model.patch_mapping.items()}
    print(patch_to_version)
    patch_versions = [patch_to_version[i] for i in range(len(model.patch_mapping))]

    # Create the plot
    plt.figure(figsize=(12, 8))
    scatter = plt.scatter(
        patch_embeddings_2d[:, 0],
        patch_embeddings_2d[:, 1],
        c=np.arange(len(patch_versions) - 1),  # Color by chronological order
        cmap="viridis",
        alpha=0.6,
    )

    # Add patch version labels
    for i, patch in enumerate(patch_versions):
        plt.annotate(
            f"{patch}",
            (patch_embeddings_2d[i, 0], patch_embeddings_2d[i, 1]),
            xytext=(5, 5),
            textcoords="offset points",
            fontsize=8,
            alpha=0.7,
        )

    plt.colorbar(scatter, label="Patch Order")
    plt.title("t-SNE Visualization of Patch Embeddings")
    plt.xlabel("t-SNE Component 1")
    plt.ylabel("t-SNE Component 2")

    # Add grid for better readability
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


visualize_patch_embeddings(model)