In [None]:
import numpy as np
import torch
import pickle
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from pathlib import Path

# Import your Model class (adjust the import path as needed)
from utils.match_prediction.model import Model
from utils.match_prediction import (
    MODEL_PATH,
    ENCODERS_PATH,
    MODEL_CONFIG_PATH,
    get_best_device,
)

device = get_best_device()

# Load label encoders
with open(ENCODERS_PATH, "rb") as f:
    label_encoders = pickle.load(f)

# Load model configuration
with open(MODEL_CONFIG_PATH, "rb") as f:
    model_params = pickle.load(f)

# Load the model
model = Model(
    num_categories=model_params["num_categories"],
    num_champions=model_params["num_champions"],
    embed_dim=model_params["embed_dim"],
    dropout=model_params["dropout"],
    hidden_dims=model_params["hidden_dims"],
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device, weights_only=True))
model.to(device)
model.eval()

# Step 1: Extract the champion embeddings
embeddings = model.champion_embedding.weight.detach().cpu().numpy()
print(f"Embeddings shape: {embeddings.shape}")  # Should be (num_champions, embed_dim)

# Step 2: Get champion labels
champion_labels = label_encoders["champion_ids"].classes_

# Step 3: Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d_tsne = tsne.fit_transform(embeddings)

# Step 4: Apply PCA
pca = PCA(n_components=2)
embeddings_2d_pca = pca.fit_transform(embeddings)

# Step 5: Plot t-SNE results
plt.figure(figsize=(10, 8))
plt.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1], s=10)
plt.title("t-SNE Visualization of Champion Embeddings")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

# Step 6: Plot PCA results
plt.figure(figsize=(10, 8))
plt.scatter(embeddings_2d_pca[:, 0], embeddings_2d_pca[:, 1], s=10)
plt.title("PCA Visualization of Champion Embeddings")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.show()

# Optional: Plot t-SNE with subset labels to avoid clutter
plt.figure(figsize=(12, 10))
plt.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1], s=10)
for i in range(0, len(champion_labels), 10):  # Label every 10th champion
    plt.annotate(
        champion_labels[i],
        (embeddings_2d_tsne[i, 0], embeddings_2d_tsne[i, 1]),
        fontsize=8,
    )
plt.title("t-SNE Visualization with Subset Labels")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

# Optional: Check PCA explained variance
pca_full = PCA()
pca_full.fit(embeddings)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)
print("Cumulative variance for first 10 components:", cumulative_variance[:10])

In [None]:
import plotly.express as px
import pandas as pd
from utils.rl.champions import Champion

# Step 1: Create id_to_name mapping from the Champion enum
id_to_name = {champ.id: champ.display_name for champ in Champion}

champion_names = []
for label in champion_labels:
    try:
        # Convert numpy string to integer if possible
        id_num = int(label)
        name = id_to_name.get(id_num, f"Unknown ID {label}")
    except ValueError:
        # Handle non-numeric values like 'UNKNOWN'
        name = str(label)
    champion_names.append(name)

df = pd.DataFrame({
    't-SNE Component 1': embeddings_2d_tsne[:, 0],
    't-SNE Component 2': embeddings_2d_tsne[:, 1],
    'Champion': champion_names
})

# Create interactive scatter plot
fig = px.scatter(
    df, 
    x='t-SNE Component 1', 
    y='t-SNE Component 2',
    text='Champion',
    hover_data=['Champion']
)
fig.update_traces(
    marker=dict(size=5),
    textposition='top center',
    textfont=dict(size=8)
)
fig.update_layout(
    title='t-SNE Visualization of Champion Embeddings',
    showlegend=False
)
fig.show()