In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
from torchtext.vocab import build_vocab_from_iterator
import pickle
from pmf import PMF
from lr import LogReg
import numpy as np
from functools import reduce

import pandas as pd



In [2]:
rls = [500, 1000]
folds = np.arange(0,5)

# D1 Analysis

In [3]:
analyses = []

for replacement_level in rls:
    pmf_dfs, lr_dfs = [], []

    for fold in folds:
        pmf_model = torch.load(f'models/pmf/model_rl_{replacement_level}_d_1_fold_{fold}.pth')
        pmf_names = pmf_model.climber_vocab.get_itos()
        pmf_weights = [x[0] for x in pmf_model.climber_embedding.weight.data.numpy()]
        pmf_df = pd.DataFrame(list(zip(pmf_names, pmf_weights)), columns=['Climber', f'Weights_{replacement_level}_{fold}'])

        with open(f'models/lr/model_rl_{replacement_level}_fold_{fold}.pkl', 'rb') as f:
            lr_model = pickle.load(f)
        lr_weights = lr_model.lr.coef_[0]
        lr_names = lr_model.climber_vocab.get_itos()
        lr_df = pd.DataFrame(list(zip(lr_names, lr_weights)), columns=['Climber', f'Weights_{replacement_level}_{fold}'])

        pmf_dfs.append(pmf_df)
        lr_dfs.append(lr_df)

    pmf = reduce(lambda left, right: pd.merge(left, right, on='Climber', how='outer'), pmf_dfs).set_index('Climber').fillna(0)
    pmf = pd.DataFrame(pmf.mean(1), columns=[f'PMF_Weight_rl_{replacement_level}'])

    lr = reduce(lambda left, right: pd.merge(left, right, on='Climber', how='outer'), lr_dfs).set_index('Climber').fillna(0)
    lr = pd.DataFrame(pmf.mean(1), columns=[f'LR_Weight_rl_{replacement_level}'])

    analysis = pd.merge(pmf, lr, on='Climber', how='outer')
    analyses.append(analysis)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [4]:
full = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), analyses).fillna(0)

In [5]:
full

Unnamed: 0_level_0,PMF_Weight_rl_500,LR_Weight_rl_500,PMF_Weight_rl_1000,LR_Weight_rl_1000
Climber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Adam Ondra,-0.139944,-0.139944,0.0,0.0
Aleksei Rubtsov,-0.012711,-0.012711,0.0,0.0
David Barrans,0.069379,0.069379,0.0,0.0
Dmitrii Sharafutdinov,0.025354,0.025354,-0.125391,-0.125391
Gabriele Moroni,0.134451,0.134451,0.0,0.0
Guillaume Glairon Mondet,-0.123702,-0.123702,0.0,0.0
Jakob Schubert,-0.991675,-0.991675,0.0,0.0
Jan Hojer,-0.303719,-0.303719,0.0,0.0
Jeremy Bonder,0.011382,0.011382,0.0,0.0
Jernej Kruder,1.005458,1.005458,0.245085,0.245085


# D2+ Analysis

In [6]:
latent_factors = 2
analyses = []

for replacement_level in rls:
    pmf_dfs = []

    for fold in folds:
        pmf_model = torch.load(f'models/pmf/model_rl_{replacement_level}_d_{latent_factors}_fold_{fold}.pth')
        pmf_names = pmf_model.climber_vocab.get_itos()
        pmf_weights = pmf_model.climber_embedding.weight.data.numpy()

        data = {'Climber': pmf_names}
        for i in range(latent_factors):
            data[f'Weight_lf_{i+1}_{replacement_level}_{fold}'] = pmf_weights[:, i]

        pmf_df = pd.DataFrame(data)
        pmf_dfs.append(pmf_df)

        with open(f'models/lr/model_rl_{replacement_level}_fold_{fold}.pkl', 'rb') as f:
            lr_model = pickle.load(f)
        lr_weights = lr_model.lr.coef_[0]
        lr_names = lr_model.climber_vocab.get_itos()
        lr_df = pd.DataFrame(list(zip(lr_names, lr_weights)), columns=['Climber', f'Weights_{replacement_level}_{fold}'])

        pmf_dfs.append(pmf_df)
        lr_dfs.append(lr_df)

    pmf = reduce(lambda left, right: pd.merge(left, right, on='Climber', how='outer'), pmf_dfs).set_index('Climber').fillna(0)

    mean_weights = {}
    for i in range(latent_factors):
        mean_weights[f'PMF_Weight_lf_{i+1}_rl_{replacement_level}'] = pmf.filter(like=f'Weight_lf_{i+1}_').mean(axis=1)
    pmf = pd.DataFrame(mean_weights)

    lr = reduce(lambda left, right: pd.merge(left, right, on='Climber', how='outer'), lr_dfs).set_index('Climber').fillna(0)
    lr = pd.DataFrame(pmf.mean(1), columns=[f'LR_Weight_rl_{replacement_level}'])

    analysis = pd.merge(pmf, lr, on='Climber', how='outer')
    analyses.append(analysis)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [7]:
full = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), analyses).fillna(0)

In [8]:
full

Unnamed: 0_level_0,PMF_Weight_lf_1_rl_500,PMF_Weight_lf_2_rl_500,LR_Weight_rl_500,PMF_Weight_lf_1_rl_1000,PMF_Weight_lf_2_rl_1000,LR_Weight_rl_1000
Climber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adam Ondra,-0.311183,0.652081,0.170449,0.0,0.0,0.0
Aleksei Rubtsov,-0.260806,0.594527,0.16686,0.0,0.0,0.0
David Barrans,-0.317378,0.102703,-0.107338,0.0,0.0,0.0
Dmitrii Sharafutdinov,-0.199462,0.745338,0.272938,0.028867,0.067717,0.048292
Gabriele Moroni,-0.332167,0.30262,-0.014774,0.0,0.0,0.0
Guillaume Glairon Mondet,0.077789,0.096642,0.087215,0.0,0.0,0.0
Jakob Schubert,0.173915,0.386054,0.279985,0.0,0.0,0.0
Jan Hojer,-0.206235,-0.021428,-0.113831,0.0,0.0,0.0
Jeremy Bonder,-0.570064,0.032914,-0.268575,0.0,0.0,0.0
Jernej Kruder,0.411856,-1.607402,-0.597773,0.058781,0.137222,0.098001


# Scraping

In [9]:
from bs4 import BeautifulSoup
import requests
import re

file_path = "scrape/website.html"

with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()
soup = BeautifulSoup(content, 'html.parser')

athletes = {}

for link in soup.find_all('a', href=True):
    href = link['href']
    if '/athlete/' in href:
        athlete_id_path = href.split('/athlete/')[1]
        athlete_parts = athlete_id_path.split('/')
        athletes[athlete_parts[0]] = athlete_parts[1]

## Need to sanitize names + Connection refusal issues

In [17]:
import re
import requests
from bs4 import BeautifulSoup
import time
from requests.exceptions import ConnectionError, Timeout

wayback_base_url = "https://web.archive.org/web/20220127211638/https://www.ifsc-climbing.org/index.php?option=com_ifsc&task=athlete.display&id="

for ID, athlete in athletes.items():
    url = f"{wayback_base_url}{ID}"

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        height_label = soup.find('p', class_='subtitle', text=re.compile(r'\bHEIGHT\b', re.I))
        if height_label:
            height_value = height_label.find_next('p', class_='paragraph').text.strip()
            print(f"{athlete} Height: {height_value}")
        else:
            print(f"No height information for {athlete}")

    except ConnectionError:
        print(f"Connection refused for {url}. Retrying in 5 seconds...")
        time.sleep(5)
        continue

    except Timeout:
        print(f"Request to {url} timed out. Retrying in 5 seconds...")
        time.sleep(5)
        continue

    except requests.exceptions.HTTPError as e:
        print(f"HTTP error for {url}: {e}")
        continue

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        continue

Connection refused for https://web.archive.org/web/20220127211638/https://www.ifsc-climbing.org/index.php?option=com_ifsc&task=athlete.display&id=1147. Retrying in 5 seconds...
Connection refused for https://web.archive.org/web/20220127211638/https://www.ifsc-climbing.org/index.php?option=com_ifsc&task=athlete.display&id=1811. Retrying in 5 seconds...


KeyboardInterrupt: 