In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import tempfile
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import numpy as np


In [None]:
# 5 world cups from 2024 an 3 events from 2025
events_urls = ['https://ifsc.results.info/event/1353/',
               'https://ifsc.results.info/event/1355/',
               'https://ifsc.results.info/event/1356/',
               'https://ifsc.results.info/event/1360/',
               'https://ifsc.results.info/event/1361/',
               'https://ifsc.results.info/event/1405/',
               'https://ifsc.results.info/event/1408/',
               'https://ifsc.results.info/event/1409/',]

In [None]:

# Set up headless Chrome browser
options = Options()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
# Set a unique temporary user data directory
# user_data_dir = tempfile.mkdtemp()
# options.add_argument(f"--user-data-dir={user_data_dir}")
# options.binary_location = os.environ["CHROME_BIN"]
driver = webdriver.Chrome(options=options)


# scrape results from finals
score_mats = []
for event_url in events_urls:
    score_mats.append([get_score_from_event(url, driver) for url in get_finals_urls(event_url,driver)])
driver.quit()

# Save to disk
np.save("score_mats.npy", np.array(score_mats, dtype=object))

In [34]:
def get_finals_urls(event_url, driver):
    """
    Get the URLs of the finals results
    for a given event.
    """
    driver.get(event_url)
    time.sleep(3)  # Wait for JavaScript to load content

    # make sure you get results from boulder events
    element = driver.find_element(By.XPATH, "//*[text()='Boulder']")
    try:
        element.click()
        time.sleep(1) # Wait for the page to load after clicking
    except: # already in boulder tab
        pass

    # get urls
    events_by_gender = driver.find_elements(By.CSS_SELECTOR, "div.dcat-row.d-flex.justify-content-between.align-items-center")
    urls = []
    for event in events_by_gender:
        print(f"Getting {event.text.split('\n')[0]} {event.text.split('\n')[-1]} results")
        urls.append(event.find_elements(By.CSS_SELECTOR, "a.cr-nav-button")[-1].get_attribute("href"))

    return urls




In [29]:
def get_score_from_event(event_url, driver):
    """
    Get the scores from a specific event URL using Selenium.
    Args:
        event_url (str): The URL of the final results.
        driver (webdriver): The Selenium WebDriver instance.
    Returns:
        np.ndarray: A numpy matrix of athlete scores. atheletes x boulders
    """
    
    driver.get(event_url)   
    time.sleep(3)  # Wait for JavaScript to load content

    event_title = driver.find_element(By.CLASS_NAME, "event-name").text
    results_table = driver.find_element(By.CLASS_NAME, "m-0")

    # Get all rows with the matching class
    athelete_rows = results_table.find_elements(By.CSS_SELECTOR, "tr.r-row.no-border-bottom")
    scores_rows = results_table.find_elements(By.CSS_SELECTOR, "tr.no-border-top.boulder-asc-detail")

    # keep a numpy matrix of all athletes all boulders scores
    # each row is an athlete, each column is a boulder
    athlete_scores_matrix = np.zeros((len(athelete_rows), 4))
    for i, scores_row in enumerate(scores_rows):
        total_score, each_boulder_scores = get_athlete_score(scores_row)
        athlete_scores_matrix[i] = each_boulder_scores

    return athlete_scores_matrix

In [12]:
def get_boulder_score(boulder):
    """
    Calculate the score for a boulder based on the number of attempts. 
    If the boulder is topped, it adds 15 points minus 0.1 for each attempt.
    If the boulder has a zone, it adds 10 points minus 0.1 for each attempt.
    """

    score = 0
    zone_attempt_element = boulder.find_elements(By.CSS_SELECTOR, "div.zone.zoned")
    if len(zone_attempt_element) > 0:
        # print("Boulder has zone")
        n_attempts = int(zone_attempt_element[0].text)
        score += 10.1 - n_attempts * 0.1

        top_attempt = boulder.find_elements(By.CSS_SELECTOR, "div.top.topped")
        if len(top_attempt) > 0:
            # print("Boulder is topped")
            n_attempts = int(top_attempt[0].text)
            score += 15.1 - n_attempts * 0.1

    return score

In [13]:
# calculate the total score for the athlete
def get_athlete_score(scores_row):
    """
    Calculate the total score for an athlete based on their boulder scores.
    Also returns the individual boulder scores.
    """
    total_score = 0
    all_boulders = scores_row.find_elements(By.CSS_SELECTOR, "div.asc-cell-container")
    each_boulder_scores = [get_boulder_score(boulder) for boulder in all_boulders]
    total_score = sum(each_boulder_scores)
    
    
    return  total_score, each_boulder_scores

In [34]:
total_score, each_boulder_scores = get_athlete_score(scores_rows[4])

In [38]:
athlete_scores_matrix

array([[25. , 25. , 24.5, 10. ],
       [25. , 25. , 10. , 10. ],
       [25. , 25. , 10. , 10. ],
       [25. , 25. , 10. , 10. ],
       [25. , 25. ,  9.9, 10. ],
       [25. , 24.8,  9.7,  9.9],
       [ 0. , 24.8, 10. , 10. ],
       [ 0. , 24.8,  9.5, 10. ]])

In [41]:
driver.quit()

In [37]:
if_topped

True

In [None]:

athlete_cells = [td.text.strip() for td in athlete_row.find_elements(By.TAG_NAME, "td")]
score_cells = [td.text.strip() for td in score_row.find_elements(By.TAG_NAME, "td")]
rank = int(athlete_row.find_element(By.CLASS_NAME, "rank").text.strip())
score = float(athlete_row.find_element(By.CLASS_NAME, "r-score").text)
link = athlete_row.find_element(By.CLASS_NAME, "r-name").get_attribute("href")
name = athlete_row.find_element(By.CLASS_NAME, "r-name").text

In [None]:
athlete_row.find_element(By.CLASS_NAME, "r-name").text

In [None]:
i = 0
athlete_row = rows[i]


<selenium.webdriver.remote.webelement.WebElement (session="9c07cc0817249d8b71162ce5b32729b8", element="f.2B262F4947F4AB7430223248370BCE16.d.700127C966EE32ED0589A2481CBF70BB.e.38")>

In [None]:
# find athelete names


In [None]:

# options.add_argument("--headless")
# options.add_argument("--no-sandbox")
# options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

try:
    # Navigate to the event page
    driver.get("https://ifsc.results.info/event/1409/cr/10034")
    time.sleep(5)  # Wait for JavaScript to load content

    # Example: Extract event title
    event_title = driver.find_element(By.CLASS_NAME, "event-name").text
    print(f"Event Title: {event_title}")

    # # Example: Extract results table
    # # Note: The actual selectors will depend on the page's structure
    results_table = driver.find_element(By.CLASS_NAME, "results")
    # rows = results_table.find_elements(By.TAG_NAME, "tr")
    # for row in rows:
    #     cells = row.find_elements(By.TAG_NAME, "td")
    #     data = [cell.text for cell in cells]
    #     print(data)

finally:
    driver.quit()


Event Title: IFSC World Cup Salt Lake City 2025
