In [1]:
# Required libraries: Selenium for webscraping, and Pandas for collecting and exporting data
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

In [2]:
def fetch_results(table, gender):
    '''
    Fetch all results from specified table for the specified gender and loads them to dataframe df
    Inputs:
    1. table - either "tb_1_1Data" for females or "tb_1_2Data" for males
    2. gender - "F" for females or "M" for males
    '''
    table = driver.find_element(By.ID, table)
    rows = table.find_elements(By.TAG_NAME, 'tr')
    for row in rows:
        row.click() # Click row to load details window including finishing place
        place = driver.find_element(
            By.XPATH,
            '/html/body/table/tbody/tr/td/div/div/div[2]/div[3]/div[2]/div/div[9]/div[6]'
        ).text
        place = int(place.replace('Overall: ', '').replace('/405', ''))
        driver.find_element(By.XPATH, '/html/body/table/tbody/tr/td/div/img').click() # Close details window
        cells = row.find_elements(By.TAG_NAME, 'td')
        name = cells[3].text
        finish_time = cells[-2].text
        df.loc[len(df.index)] = [name, gender, finish_time, place]

In [3]:
def update_place(x):
    '''
    Runners in place 105 and 306 have no name/gender data, so they will be excluded
    from the dataframe, and other runners' places are updated accordingly.
    Input: original Place from df
    Output: Adjusted Place
    '''
    if x < 105:
        y = x # listed place is correct
    elif x < 306:
        y = x - 1 # bump up one to account for missing #105
    else:
        y = x - 2 # bump up two to account for #105, #306
    return y

In [4]:
# Establish web driver, open webpage, and create dataframe
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(10)
driver.get('https://my.raceresult.com/182768/results')

df = pd.DataFrame(columns=['Name', 'Gender', 'Time', 'Place'])

# Dismiss cookies pop up
driver.find_element(
    By.XPATH,
    '//a[text()="Got it"]'
).click()

# Load all results for female finishers
driver.find_element(
    By.XPATH,
    '//a[text()="show all 137 participants"]'
).click()

time.sleep(1) # The implicit wait doesn't work here because the table exists,
# but it needs more time to finish loading

fetch_results('tb_1_1Data', 'F')

# Load all results for male finishers
driver.find_element(
    By.XPATH,
    '//a[text()="show all 266 participants"]'
).click()

time.sleep(1)

fetch_results('tb_1_2Data', 'M')

driver.quit()

# Adjust places for missing runners, sort df by place, and export
df['Place'] = df[['Place']].applymap(update_place)
df.sort_values('Place', inplace=True)
df.set_index('Place', drop=True, inplace=True)
df.to_csv('2021boulder.csv')