In [None]:
import torch
import pandas as pd
import plotly.express as px
from umap import UMAP
from sklearn.manifold import TSNE
import numpy as np
from src.utils import Plotter, Tokenizer
from tqdm import tqdm

In [None]:
df = pd.read_csv("data/raw/climbs.csv")
df = df[df["angle"] == 40]
df = df[df["ascensionist_count"] >= 3]
holds = pd.read_csv("data/raw/holds.csv")
holds_dict = {v:k for k,v in holds["id_x"].to_dict().items()}
holds.set_index("id_x", inplace=True)
p = Plotter()
df["n_holds"] = df["frames"].apply(lambda x: len(x.split("p")) - 1)
print(df.shape)

In [None]:
def binary_encode(frames):
    holds_present = [int(x[:-3]) - 1073 for x in frames.split("p")[1:]]
    encoding = np.zeros(len(holds), dtype=int)
    encoding[holds_present] = 1
    return encoding

In [None]:
data_jaccard = np.stack([binary_encode(frames) for frames in df["frames"]])
umap = UMAP(n_components=2, n_neighbors=250, min_dist=0.1, metric="hamming")
embedded = umap.fit_transform(data_jaccard)
df["x_bin"] = embedded[:, 0]
df["y_bin"] = embedded[:, 1]

In [None]:
px.scatter(
    df,
    x="x_bin",
    y="y_bin",
    color="difficulty_average",
    hover_data=["name", "setter_username", "font_grade"],
    width=1200,
    height=800,
).show()

In [None]:
from src.models.gpt import GPTModel
from src.utils import shuffle_holds
model = GPTModel.load_from_checkpoint("artifacts/model-6hojwi3o:v0/model.ckpt")
tokenizer = Tokenizer.load("data/tokenizer.pt")

In [None]:
df = pd.read_csv("data/generated/best.csv")
data = []
for name, row in df.iterrows():
    frames = row["frames"]
    angle = row["angle"]
    font_grade = row["font_grade"]
    stack = []
    for _ in range(20):
        stack.append(tokenizer.encode(shuffle_holds(frames), angle, font_grade, pad=64))
    data.append(torch.stack(stack))
data = torch.stack(data)

In [None]:
embedded_data = []
model.eval()
with torch.no_grad():
    for i in tqdm(data):
        t = model.embed(i.to("cuda")).mean(dim=[0, 1])
        embedded_data.append(t.cpu().detach())
data = torch.stack(embedded_data)

In [None]:
umap = UMAP(n_components=2, n_neighbors=1000, min_dist=0.8, metric="cosine")
embedded = umap.fit_transform(data)
df["x_gpt"] = embedded[:, 0]
df["y_gpt"] = embedded[:, 1]

In [None]:
fig = px.scatter(
    df,
    x="x_gpt",
    y="y_gpt",
    # color="difficulty_average",
    hover_data=["name", "font_grade"],
    width=1200,
    height=800,
    opacity=0.7,
)
#remove colorbar
fig.update_layout(coloraxis_showscale=False)
fig.show()

In [None]:
df.to_csv("dash_generated.csv", index=False)

In [None]:
holds = pd.read_csv("data/raw/holds.csv")

In [None]:
model = model.to("cpu")

In [None]:
model.model.tok_embedding(torch.tensor([1,2,3], device="cuda"))

In [None]:
tokens = pd.DataFrame(pd.Series(tokenizer.decode_map), columns=["token"])
token_embeddings = (
    model.model.tok_embedding(torch.arange(len(tokenizer.decode_map)).unsqueeze(0).to("cuda")).cpu().detach()
)

In [None]:
torch.arange(len(tokenizer.decode_map)).to("cuda").shape