In [None]:
import numpy as np
import torch
import pickle
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from pathlib import Path

# Import your Model class (adjust the import path as needed)
from utils.match_prediction.model import Model
from utils.match_prediction import (
    MODEL_PATH,
    ENCODERS_PATH,
    MODEL_CONFIG_PATH,
    get_best_device,
)

device = get_best_device()

# Load label encoders
with open(ENCODERS_PATH, "rb") as f:
    label_encoders = pickle.load(f)

# Load model configuration
with open(MODEL_CONFIG_PATH, "rb") as f:
    model_params = pickle.load(f)

# Load the model
model = Model(
    num_categories=model_params["num_categories"],
    num_champions=171, # TODO: load from champion enum
    embed_dim=model_params["embed_dim"],
    dropout=model_params["dropout"],
    hidden_dims=model_params["hidden_dims"],
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device, weights_only=True))
model.to(device)
model.eval()

# Step 1: Extract the champion embeddings
embeddings = model.champion_embedding.weight.detach().cpu().numpy()
print(f"Embeddings shape: {embeddings.shape}")  # Should be (num_champions, embed_dim)

# Step 2: Get champion labels
champion_labels = label_encoders["champion_ids"].classes_

# Step 3: Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d_tsne = tsne.fit_transform(embeddings)

# Step 4: Apply PCA
pca = PCA(n_components=2)
embeddings_2d_pca = pca.fit_transform(embeddings)

# Step 5: Plot t-SNE results
plt.figure(figsize=(10, 8))
plt.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1], s=10)
plt.title("t-SNE Visualization of Champion Embeddings")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

# Step 6: Plot PCA results
plt.figure(figsize=(10, 8))
plt.scatter(embeddings_2d_pca[:, 0], embeddings_2d_pca[:, 1], s=10)
plt.title("PCA Visualization of Champion Embeddings")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.show()

# Optional: Plot t-SNE with subset labels to avoid clutter
plt.figure(figsize=(12, 10))
plt.scatter(embeddings_2d_tsne[:, 0], embeddings_2d_tsne[:, 1], s=10)
for i in range(0, len(champion_labels), 10):  # Label every 10th champion
    plt.annotate(
        champion_labels[i],
        (embeddings_2d_tsne[i, 0], embeddings_2d_tsne[i, 1]),
        fontsize=8,
    )
plt.title("t-SNE Visualization with Subset Labels")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

# Optional: Check PCA explained variance
pca_full = PCA()
pca_full.fit(embeddings)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)
print("Cumulative variance for first 10 components:", cumulative_variance[:10])

In [None]:
import plotly.express as px
import pandas as pd
from utils.rl.champions import Champion

# Step 1: Create id_to_name mapping from the Champion enum
id_to_name = {champ.id: champ.display_name for champ in Champion}

champion_names = []
for label in champion_labels:
    try:
        # Convert numpy string to integer if possible
        id_num = int(label)
        name = id_to_name.get(id_num, f"Unknown ID {label}")
    except ValueError:
        # Handle non-numeric values like 'UNKNOWN'
        name = str(label)
    champion_names.append(name)

df = pd.DataFrame({
    't-SNE Component 1': embeddings_2d_tsne[:, 0],
    't-SNE Component 2': embeddings_2d_tsne[:, 1],
    'Champion': champion_names
})

# Create interactive scatter plot
fig = px.scatter(
    df, 
    x='t-SNE Component 1', 
    y='t-SNE Component 2',
    text='Champion',
    hover_data=['Champion']
)
fig.update_traces(
    marker=dict(size=5),
    textposition='top center',
    textfont=dict(size=8)
)
fig.update_layout(
    title='t-SNE Visualization of Champion Embeddings',
    showlegend=False
)
fig.show()

In [None]:
# Visualize positional embedding scale
pos_scale_value = model.pos_scale.item()
plt.figure(figsize=(8, 2))
plt.barh(['Positional Scale'], [pos_scale_value], color='skyblue')
plt.title('Positional Embedding Scale Parameter')
plt.xlabel('Scale Value')
plt.axvline(x=0, color='black', linestyle='-', alpha=0.2)
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
from utils.match_prediction.column_definitions import (
    NUMERICAL_COLUMNS,
    CATEGORICAL_COLUMNS,
    POSITIONS,
)

# Visualize positional embeddings
pos_embeddings = model.pos_embedding.squeeze(0).detach().cpu().numpy()
num_positions = pos_embeddings.shape[0]
embed_dim = pos_embeddings.shape[1]

plt.figure(figsize=(12, 8))
im = plt.imshow(pos_embeddings, aspect='auto', cmap='RdBu')
plt.colorbar(im, label='Embedding Value')
plt.title('Positional Embeddings Heatmap')
plt.xlabel('Embedding Dimension')
plt.ylabel('Position Index')

# Add position type labels on y-axis
position_labels = []
num_categorical = len(CATEGORICAL_COLUMNS)
num_champions = len(POSITIONS) * 2
num_numerical = 1 if NUMERICAL_COLUMNS else 0

# Create labels for each position type
for i in range(num_positions):
    if i < num_categorical:
        position_labels.append(f'Cat {CATEGORICAL_COLUMNS[i]}')
    elif i < num_categorical + num_champions:
        pos_idx = i - num_categorical
        team = 'Blue' if pos_idx < 5 else 'Red'
        role = POSITIONS[pos_idx % 5]
        position_labels.append(f'{team} {role}')
    else:
        position_labels.append('Numerical')

plt.yticks(range(num_positions), position_labels, fontsize=8)
plt.xticks(range(0, embed_dim, embed_dim//10))
plt.show()

# Print statistics about the positional embeddings
print("\nPositional Embedding Statistics:")
print(f"Mean value: {pos_embeddings.mean():.3f}")
print(f"Std deviation: {pos_embeddings.std():.3f}")
print(f"Min value: {pos_embeddings.min():.3f}")
print(f"Max value: {pos_embeddings.max():.3f}")

In [None]:
# Extract positional embeddings
pos_embeddings = model.pos_embedding.squeeze(0).detach().cpu().numpy()[:10]  # Take only champion positions

# Create position labels
position_labels = []
for i in range(10):  # 10 champion positions
    team = 'Blue' if i < 5 else 'Red'
    role = POSITIONS[i % 5]
    position_labels.append(f'{team} {role}')

# Apply t-SNE with lower perplexity
tsne = TSNE(n_components=2, random_state=42, perplexity=5)  # Reduced perplexity
pos_embeddings_2d_tsne = tsne.fit_transform(pos_embeddings)

# Create interactive plot with Plotly
import plotly.express as px
import pandas as pd

df = pd.DataFrame({
    't-SNE Component 1': pos_embeddings_2d_tsne[:, 0],
    't-SNE Component 2': pos_embeddings_2d_tsne[:, 1],
    'Position': position_labels
})

# Create interactive scatter plot
fig = px.scatter(
    df, 
    x='t-SNE Component 1', 
    y='t-SNE Component 2',
    text='Position',
    hover_data=['Position']
)
fig.update_traces(
    marker=dict(size=10),
    textposition='top center',
    textfont=dict(size=8)
)
fig.update_layout(
    title='t-SNE Visualization of Positional Embeddings',
    showlegend=False
)
fig.show()

# Optional: Check PCA explained variance
pca_full = PCA()
pca_full.fit(pos_embeddings)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)
print("\nCumulative variance explained by PCA components:")
print(cumulative_variance[:10])  # Show first 10 components