In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import torch
from pmf import PMF
import pandas as pd

pmf_model = torch.load(f"models/pmf/model_rl_{100}_d_{3}_full_data.pth")
pmf_model.eval()
Climbers_rl_100 = pmf_model.climber_vocab.get_itos()[1:]

def scrape_athlete_data(athlete_id):
    driver = webdriver.Chrome()

    try:
        athlete_url = f'https://ifsc.results.info/#/athlete/{athlete_id}'
        driver.get(athlete_url)

        WebDriverWait(driver, 1).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.athlete-name-container'))
        )

        name = driver.find_element(By.CSS_SELECTOR, '.athlete-name').text.lower()
        athlete_info = driver.find_element(By.CSS_SELECTOR, '.athlete-info').text

        height = None
        for line in athlete_info.split('\n'):
            if line.startswith('HEIGHT:'):

                height = int(line.split(':')[1].strip())
                break

        return name, height if height is not None else 0

    except Exception as e:
        return 'empty', 0
    finally:
        driver.quit()

def scrape_all(start,end):
    for index in range(start, end):
        name, height = scrape_athlete_data(str(index))
        Climbers_data[name] = height

        if name in Climbers_100:
            c_100[name] = height
            print(f'GOT: {name}')

Climbers_data = {}
Climbers_100 = set(climber.lower() for climber in pmf_model.climber_vocab.get_itos()[1:])
c_100 = {}



In [None]:
scrape_all(1,10000)

c_100_df = pd.DataFrame(list(c_100.items()), columns=['Name', 'Height'])
c_100_df.to_csv('c_100_data.csv', index=False)

In [102]:
climbers_source_1 = pd.read_csv('data/scrape_climbers_data.csv')
climbers_source_1['Name'] = climbers_source_1['Name'].apply(lambda x: ' '.join(word.capitalize() for word in x.split()))
climbers_source_1.set_index('Name', inplace=True)

# Source 2: Another Github
https://github.com/lfigil/ifsc-data

In [103]:
climbers_source_2 = (
    pd.read_csv('data/athlete_info_v2.csv', usecols=['Name', 'Height'])
    .assign(Name=lambda df: df['Name'].str.title())
    .set_index('Name')
    .loc[lambda df: df.index.isin(Climbers_rl_100)]
)

# Source 3: Reddit Post:
https://www.reddit.com/r/bouldering/comments/16yvuzt/the_ideal_weight_for_climbing_sport_lets_look_at/

In [104]:
import re
import pandas as pd

def parse_climber_data(data):
    climbers = []
    pattern = re.compile(r'([A-Za-z\s]+)\s+(\d{3})\s+\d{1,2}′\s+\d{1,2}″')
    for line in data.splitlines():
        match = pattern.match(line)
        if match:
            name = match.group(1).strip()
            height = int(match.group(2).strip())
            climbers.append({"Name": name, "Height": height})

    return climbers

# Provided data
data = """
Adam Ondra 185 6′ 1″ 70 154 20.5
Alberto Ginés López 169 5′ 7″ 58 128 20.3
Aleksei Rubtsov 178 5′ 10″ 63 139 19.9
Alex Megos 175 5′ 9″ 57 126 18.6
Alfian Muhammad Fajri 166 5′ 5″ 59 130 21.4
Chris Sharma 183 6′ 0″ 75 165 22.4
Dan Osman 179 5′ 10″ 70 155 21.8
Daniel Woods 170 5′ 7″ 61 134 21.1
Danyil Boldyrev 190 6′ 3″ 78 172 21.6
Dave Graham 179 5′ 10″ 63 139 19.7
Dean Potter 196 6′ 5″ 83 185 21.6
Jakob Schubert 176 5′ 9″ 63 139 20.3
Jimmy Webb 183 6′ 0″ 78 172 23.3
Jan Hojer 188 6′ 2″ 77 170 21.8
Jernej Kruder 180 5′ 11″ 71 157 21.9
Jongwon Chon 176 5′ 9″ 60 132 19.4
Kevin Jorgeson 175 5′ 9″ 66 146 21.6
Kokoro Fujii 176 5′ 9″ 64 141 20.7
Ludovico Fossali 185 6′ 1″ 74 163 21.6
Magnus Midtbø 173 5′ 8″ 71 157 23.7
Nalle Hukkataival 173 5′ 8″ 68 150 22.7
Nathaniel Coleman 180 5′ 11″ 74 163 22.8
Patxi Usobiaga 174 5′ 9″ 62 137 20.5
Qixin Zhong 170 5′ 7″ 68 150 23.5
Reza Alipour 170 5′ 7″ 76 168 26.3
Rishat Khaibullin 169 5′ 7″ 56 123 19.6
Sean McColl 169 5′ 7″ 60 132 21.0
Stefano Ghisolfi 170 5′ 7″ 58 128 20.1
Tom Randall 178 5′ 10″ 69 152 21.8
Tommy Caldwell 180 5′ 11″ 75 165 23.1
Tomoa Narasaki 170 5′ 7″ 58
"""

climbers_source_3 = parse_climber_data(data)
climbers_source_3 = pd.DataFrame(climbers_source_3).set_index('Name')

# Combine Data

In [105]:
print(len(climbers_source_1))
print(len(climbers_source_2))
print(len(climbers_source_3))

57
68
29


In [106]:
stacked_climbers = pd.concat([climbers_source_1, climbers_source_2, climbers_source_3])
climbers = stacked_climbers[~stacked_climbers.index.duplicated(keep='first')]
climbers = climbers[~climbers.isin(['-']).any(axis=1)]

In [107]:
climbers.to_csv('data/climbers_heights.csv')