In [10]:
import pandas as pd
import plotly.express as px
import sqlite3
import numpy as np
from src.utils import get_climb_score, EncoderDecoder
from sklearn.model_selection import train_test_split


In [2]:
# load everything from sql
conn = sqlite3.connect("data/db.sqlite3")
climbs = pd.read_sql_query("SELECT * FROM climbs", conn)
grades = pd.read_sql_query("SELECT * FROM difficulty_grades", conn)
stats = pd.read_sql_query("SELECT * FROM climb_stats", conn)
holds = pd.read_sql_query("SELECT * FROM holes", conn)
placements = pd.read_sql_query("SELECT * FROM placements", conn)
holds = pd.merge(placements, holds, left_on="hole_id", right_on="id")
holds.set_index("id_x", inplace=True)

In [3]:
# merge and rename
df = pd.merge(
    climbs.drop("angle", axis=1), stats, left_on="uuid", right_on="climb_uuid"
)
df["display_grade"] = df["display_difficulty"].apply(
    lambda x: grades.loc[int(x) + 1, "boulder_name"]
)
df["average_grade"] = df["difficulty_average"].apply(
    lambda x: grades.loc[int(x) + 1, "boulder_name"]
)

In [5]:
LAYOUT_ID = 1
MIN_ASCENTS = 1
MIN_QUALITY = 2

print(df.shape)
df = df[df["frames_count"] == 1]
print(df.shape)
df = df[df["is_listed"] == 1]
print(df.shape)
df = df[df["layout_id"] == 1]
print(df.shape)
df = df[df["quality_average"] >= MIN_QUALITY]
print(df.shape)
df = df[df["ascensionist_count"] >= MIN_ASCENTS].reset_index()
print(df.shape)

holds = holds[holds["layout_id"] == 1]
holds = holds[holds.index.to_series() < 3000]
encdec = EncoderDecoder()

(124616, 29)
(124616, 29)
(124616, 29)
(124616, 29)
(124616, 29)
(124616, 30)


In [6]:
used_holds = set()
colors = set()
bad_route_ids = set()
for name, row in df.iterrows():
    for frame in row["frames"].split("p")[1:]:
        hold, color = frame.split("r")
        if int(color) not in [12, 13, 14, 15] or int(hold) not in holds.index:
            bad_route_ids.add(name)
            break
        used_holds.add(int(hold))
        colors.add(int(color))
print(df.shape)
df = df.loc[list(set(df.index).difference(bad_route_ids))]
print(df.shape)
df["score"] = df["frames"].apply(lambda x: get_climb_score(encdec(x, 0)))
df = df[df["score"] >= 3.5]
print(df.shape)

(124616, 30)
(123368, 30)


  x, y = (argmax // climb.size(2), argmax % climb.size(2))
  x, y = (argmax // climb.size(2), argmax % climb.size(2))


(108205, 31)


In [11]:
groups = df.groupby("frames")
groups = [group for _, group in groups]
train_groups, test_groups = train_test_split(groups, test_size=0.3, random_state=42)
val_groups, test_groups = train_test_split(test_groups, test_size=0.5, random_state=42)

# Concatenate the groups back into dataframes
train = pd.concat(train_groups)
val = pd.concat(val_groups)
test = pd.concat(test_groups)

In [7]:
!mkdir -p data/raw
df.to_csv("data/raw/all_climbs.csv")
train.to_csv("data/raw/train.csv")
test.to_csv("data/raw/test.csv")
val.to_csv("data/raw/val.csv")
holds.to_csv("data/raw/holds.csv")
grades.to_csv("data/raw/grades.csv")

In [8]:
df["frames"].to_csv("frames.txt", index=False, header=False)

In [9]:
df = df.drop_duplicates("frames")

In [10]:
df["frames"].apply(lambda x: " ".join(x.split("p")[1:])).to_csv(
    "../minimal-text-diffusion/data/frames-split.txt", index=False, header=False
)