In [118]:
import os
import requests
from bs4 import BeautifulSoup
from selenium.webdriver import *
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pickle
import pandas as pd

# Constants

In [120]:
ID = "id"
NAME = "name"
XPATH = "xpath"
LINK_TEXT = "link text"
PARTIAL_LINK_TEXT = "partial link text"
TAG_NAME = "tag name"
CLASS_NAME = "class name"
CSS_SELECTOR = "css selector"

COLUMNS_MAPPING = {1: "rank", 2: "names", 3: "nationality", 4: "gender", 5: "age_category", 6: "time"}
SUFFIXES = {1: "", 2: "/a", 3: "/span[2]", 4: "/span", 5: "", 6: ""}

# Links to results pages

In [122]:
ETC_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=35016.hokautmbmont-blancetc.2025"
MCC_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=11567.hokautmbmont-blancmcc.2025"
OCC_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=2280.hokautmbmont-blancocc.2025"
TDS_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=646.hokautmbmont-blanctds.2025"
UTMB_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=142.hokautmbmont-blancutmb.2025"
CCC_LINK = "https://montblanc.utmb.world/results?year=2025&raceUri=141.hokautmbmont-blancccc.2025"

XPATH_ID = "5fe136fe-4d5c-43b7-9c93-e71986ee837c"

ORDER_DICT = {
    0: ["ETC", ETC_LINK],
    1: ["MCC", MCC_LINK],
    2: ["OCC", OCC_LINK],
    3: ["TDS", TDS_LINK],
    4: ["UTMB", UTMB_LINK],
    5: ["CCC", CCC_LINK]
}

# Create browser instance

In [124]:
def create_browser():
    """
    Function creating browser object.
    """
    # add options to disable pop-ups
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("disable-notifications")
    # return browser object
    chrome = Chrome(options=chrome_options)
    return chrome

In [125]:
chrome = create_browser()

# Load results page

In [127]:
def load_results_page(chrome, link):
    """
    Loads the results page and closes the cookies window.
    """
    chrome.get(link)
    chrome.maximize_window()
    try:
        time.sleep(3)
        chrome.find_element(By.XPATH, """//*[@id="onetrust-reject-all-handler"]""").click()
    except:
        pass

# Load results data

In [129]:
def load_data(chrome, col_mapping, suffixes, general_xpath, data_dict):
    for col_number in range(1, 7):
        category_name = col_mapping[col_number]
        suffix = suffixes[col_number]
        data_list = []
        for i in range(2, 52):
            try:
                name = chrome.find_element(By.XPATH, f"""{general_xpath}/div[{i}]/div[{col_number}]{suffix}""").get_attribute("textContent")
                data_list.append(name)
            except:
                break
        if category_name in list(data_dict.keys()):
            data_dict[category_name] += data_list
        else:
            data_dict[category_name] = data_list
    return data_dict

In [130]:
for i in range(6):
    data_dict = {}
    page_results_link = ORDER_DICT[i][1]
    race_name = ORDER_DICT[i][0]

    # load page, make sure everything is ready with time.sleep
    load_results_page(chrome, page_results_link)
    time.sleep(5)

    # load results to dictionary
    data_dict = load_data(chrome, COLUMNS_MAPPING, SUFFIXES, 
                          f""" //*[@id="{XPATH_ID}"]/section/div/div""",
                         data_dict)
    cur_page = 1
    max_page = int(chrome.find_element(
                    By.XPATH, f"""//*[@id="{XPATH_ID}"]/section/ul/li[6]/a""").get_attribute("textContent")
                  )

    for page_num in range(2, max_page + 1):
        print("MOVING TO PAGE NUMBER: ", page_num)
        clickable_pages = chrome.find_elements(By.CLASS_NAME, "pagination_paginate_item__xFgYE")
        for j in range(len(clickable_pages)):
            num = clickable_pages[j].get_attribute("textContent")
            try:
                num = int(num)
                if num == cur_page + 1:
                    el = chrome.find_elements(By.CLASS_NAME, "pagination_paginate_item__xFgYE")[j]
                    link = el.find_element(By.TAG_NAME, "a")
                    chrome.execute_script("arguments[0].click();", link)
                    break
            except Exception as e:
                continue

        time.sleep(2)
        
        data_dict = load_data(chrome, COLUMNS_MAPPING, SUFFIXES, 
                      f""" //*[@id="{XPATH_ID}"]/section/div/div""",
                     data_dict)
        cur_page += 1

    # convert dictionary to Pandas DataFrame
    data_df = pd.DataFrame.from_dict(data_dict)
    data_df["race"] = race_name
    print(race_name)
    print(data_df.head())
    data_df.to_csv(f"{race_name}.csv")

MOVING TO PAGE NUMBER:  2
MOVING TO PAGE NUMBER:  3
MOVING TO PAGE NUMBER:  4
MOVING TO PAGE NUMBER:  5
MOVING TO PAGE NUMBER:  6
MOVING TO PAGE NUMBER:  7
MOVING TO PAGE NUMBER:  8
MOVING TO PAGE NUMBER:  9
MOVING TO PAGE NUMBER:  10
MOVING TO PAGE NUMBER:  11
MOVING TO PAGE NUMBER:  12
MOVING TO PAGE NUMBER:  13
MOVING TO PAGE NUMBER:  14
MOVING TO PAGE NUMBER:  15
MOVING TO PAGE NUMBER:  16
MOVING TO PAGE NUMBER:  17
MOVING TO PAGE NUMBER:  18
MOVING TO PAGE NUMBER:  19
MOVING TO PAGE NUMBER:  20
MOVING TO PAGE NUMBER:  21
MOVING TO PAGE NUMBER:  22
MOVING TO PAGE NUMBER:  23
MOVING TO PAGE NUMBER:  24
MOVING TO PAGE NUMBER:  25
MOVING TO PAGE NUMBER:  26
MOVING TO PAGE NUMBER:  27
ETC
  rank                    names nationality gender age_category      time race
0    1  Alain SANTAMARIA BLANCO       Spain    Men        20-34  01:21:07  ETC
1    2           Cesare MAESTRI       Italy    Men        20-34  01:22:23  ETC
2    3         Maximilien DRION     Belgium    Men        20-34  