In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re


In [2]:
URL_5k_p1_2023 = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=1'

In [3]:
response = requests.get(URL_5k_p1_2023)
response.raise_for_status()

In [4]:
def get_names(response):
    soup = BeautifulSoup(response.content, "html.parser")
    values = soup.find_all('a', class_ = 'ltw-name', target = '_blank')
    extracted_names = [name.get_text() for name in values]
    names_series = pd.Series(extracted_names)
    return names_series

def get_ages(response, year):
    soup = BeautifulSoup(response.content, "html.parser")
    values = soup.find_all('td' ,class_ = 'd-none d-sm-table-cell')
    extracted_values = [age.get_text() for age in values]
    value_series = pd.Series(extracted_values)
    numeric_age_data = pd.to_numeric(value_series, errors='coerce')
    if (year == 2023):
        correct_age_data = numeric_age_data.iloc[3::5]
    elif(year == 2022):
        correct_age_data = numeric_age_data.iloc[2::5]
    elif(year == 2019):
        correct_age_data = numeric_age_data.iloc[4::8]
    elif (year == 2018):
        correct_age_data = numeric_age_data.iloc[4::8]
    elif (year == 2017 or year == 2016):
        correct_age_data = numeric_age_data.iloc[2::5]
    elif (year == 2015 or year == 2014):
        correct_age_data = numeric_age_data.iloc[2::5]
    elif (year == 2013):
        correct_age_data = numeric_age_data.iloc[2::5]
    
    correct_age_data.reset_index(drop=True, inplace=True)
    correct_age_data = correct_age_data.astype(int)
    return correct_age_data

def get_times(response, year):
    soup = BeautifulSoup(response.content, "html.parser")
    values = soup.find_all('td' ,class_ = 'd-none d-sm-table-cell ltw-time')
    extracted_values = [time.get_text() for time in values]
    value_series = pd.Series(extracted_values)
    if (year == 2015 or year == 2014):
        correct_time_data = value_series.iloc[0::2]
    if (year == 2013):
        correct_time_data = value_series.iloc[0::2]
    else:
        correct_time_data = value_series.iloc[1::2]
    correct_time_data.reset_index(drop=True, inplace=True)
    return correct_time_data

def get_gender(response):
    soup = BeautifulSoup(response.content, "html.parser")
    values = soup.find_all('a', class_ = 'ltw-name')
    extracted_values = [value.get_text() for value in values]
    value_series = pd.Series(extracted_values)
    correct_gender_data = value_series.iloc[2::3]
    correct_gender_data.reset_index(drop=True, inplace=True)
    return correct_gender_data


In [5]:
def generate_dataframe(URL, distance, year):
    response = requests.get(URL)
    response.raise_for_status()
    names = get_names(response)
    ages = get_ages(response, year)
    times = get_times(response, year)
    genders = get_gender(response)
    df = pd.DataFrame({'Distance':distance, 
                       'Year':year,
                       'Name': names, 
                       'Age': ages, 
                       'Time': times, 
                       'Gender': genders 
                       })
    return df
    

In [9]:
url_5K_2023 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=6',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=7',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=8',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=1&dt=0&PageNo=9',]
url_10K_2023 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2&dt=0&PageNo=4',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2&dt=0&PageNo=5',]
url_5K_2022 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6103',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=6',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=1&dt=0&PageNo=7',]
url_10K_2022 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=2&dt=0&PageNo=4',]
url_5K_2019 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6072',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=1&dt=0&PageNo=6',]
url_10K_2019 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072&EId=2&dt=0&PageNo=4',]
url_5K_2018 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6047',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=6',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=1&dt=0&PageNo=7',]
url_10K_2018 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=2&dt=0&PageNo=4',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047&EId=2&dt=0&PageNo=5',]
url_5K_2017 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6024',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=1&dt=0&PageNo=4',]
url_10K_2017 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024&EId=2&dt=0&PageNo=3',]
url_5K_2016 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=188',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=1&dt=0&PageNo=6',]
url_10K_2016 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=188&EId=2&dt=0&PageNo=4',]
url_5K_2015 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=148',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=6',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=1&dt=0&PageNo=7',]
url_10K_2015 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=4',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=5',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=6',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=148&EId=2&dt=0&PageNo=7',]
url_5K_2014 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=92',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=2',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=3',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=4',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=5',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=6',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=7',
               'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=1&dt=0&PageNo=8',]
url_10K_2014 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=2',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=3',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=4',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=5',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=6',
                'https://results.changeofpace.com/results.aspx?CId=16356&RId=92&EId=2&dt=0&PageNo=7',]
url_5K_2013 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=30',]
url_10K_2013 = ['https://results.changeofpace.com/results.aspx?CId=16356&RId=30&EId=2',]

In [10]:
years = [2023, 2022, 2019, 2018, 2017, 2016, 2015, 2014, 2013]
doc_types = ['5K', '10K']
dfs = {f"df_{year}_{doc_type}": pd.DataFrame() for year in years for doc_type in doc_types}
for year in years:
    for doc_type in doc_types:
        for url in globals()[f"url_{doc_type}_{year}"]:
            df = generate_dataframe(url, doc_type, year)
            dfs[f"df_{year}_{doc_type}"] = pd.concat([dfs[f"df_{year}_{doc_type}"], df], ignore_index=True)
all_dfs = pd.concat(dfs.values(),axis=0)

In [11]:
all_dfs.to_csv('all_data.csv', index=False)

In [194]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6121&EId=2&dt=0&PageNo=2'
df = generate_dataframe(URL, '5K', 2023)
df

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,5K,2023,Mike Cordano,71,53:00.21,Male
1,5K,2023,Hortensia Cisneros Benftez,39,53:02.35,Female
2,5K,2023,Kevin Barrett,26,53:07.10,Male
3,5K,2023,David Wilson,58,53:09.97,Male
4,5K,2023,Timothy Hendrix,24,53:19.90,Male
5,5K,2023,Alden Mann,16,53:26.25,Male
6,5K,2023,Sukhtej Atwal,35,53:31.96,Female
7,5K,2023,Whitney Worthington,34,53:37.25,Female
8,5K,2023,Phang Lor,34,53:48.84,Male
9,5K,2023,Eason Lor,14,53:51.34,Male


In [120]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6103&EId=2'
df = generate_dataframe(URL, '10K', 2022)
df

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,10K,2022,Felipe Garcia,33,40:33.51,Male
1,10K,2022,Nicholas Troppy,26,40:55.78,Male
2,10K,2022,Joseph Huang,36,41:55.93,Male
3,10K,2022,Arie Bialostozky,21,42:27.61,Male
4,10K,2022,Chris Jones,50,42:38.72,Male
5,10K,2022,Jenna Calvert,16,43:43.14,Female
6,10K,2022,Justin Tenney,32,44:20.89,Male
7,10K,2022,Christopher Babel,28,44:47.99,Male
8,10K,2022,Jim Farrar,57,44:48.70,Male
9,10K,2022,Brian Hawley,39,45:03.50,Male


In [121]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6072'
df = generate_dataframe(URL, '5K', 2019)
df.dropna()

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,5K,2019,Raymond Rodriguez,57,00:17:46.93,Male
1,5K,2019,Claire Nieuwenhuis,29,00:20:07.98,Female
2,5K,2019,Richard King,49,00:20:15.73,Male
3,5K,2019,Elijah Barao,39,00:20:37.70,Male
4,5K,2019,Emily Castles,15,00:20:37.72,Female
5,5K,2019,Cynthia Morales,27,00:20:41.20,Female
6,5K,2019,Grant Garrison,45,00:20:50.04,Male
7,5K,2019,Martin Luu,36,00:20:58.06,Male
8,5K,2019,Greg Mandler,56,00:21:04.23,Male
9,5K,2019,Marjorie Longo,53,00:21:13.29,Female


In [122]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6047'
df = generate_dataframe(URL, '5K', 2018)
df.dropna()

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,5K,2018,Bennett,17,00:16:16.630,Male
1,5K,2018,Nolan,21,00:16:42.217,Male
2,5K,2018,Raymond,56,00:18:37.337,Male
3,5K,2018,Luis,30,00:18:53.710,Male
4,5K,2018,Allie,30,00:19:37.527,Female
5,5K,2018,Allison,16,00:20:50.013,Female
6,5K,2018,Martin,35,00:21:31.007,Male
7,5K,2018,Brieanna,16,00:21:35.847,Female
8,5K,2018,Ted,55,00:21:36.187,Male
9,5K,2018,Luke,25,00:21:37.067,Male


In [128]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=6024'
df = generate_dataframe(URL, '5K', 2017)
df.dropna()

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,5K,2017,Stephen,47,00:18:54.847,Male
1,5K,2017,Michael,26,00:19:29.603,Male
2,5K,2017,Isaac,23,00:19:43.693,Male
3,5K,2017,Ned,43,00:19:56.990,Male
4,5K,2017,Peter,41,00:20:10.530,Male
5,5K,2017,Jaxin,15,00:20:21.287,Female
6,5K,2017,Luke,14,00:20:42.240,Male
7,5K,2017,Paul,40,00:21:10.407,Male
8,5K,2017,J.R.,51,00:21:36.347,Male
9,5K,2017,Charles,48,00:21:37.480,Male


In [137]:
URL = 'https://results.changeofpace.com/results.aspx?CId=16356&RId=188'
df = generate_dataframe(URL, '5K', 2016)
df.dropna()

Unnamed: 0,Distance,Year,Name,Age,Time,Gender
0,5K,2016,Alexander Garcia,19,15:37.750,Male
1,5K,2016,Osama Tada,36,16:03.810,Male
2,5K,2016,Nolan Gerlach,19,16:52.670,Male
3,5K,2016,Zachary Bonner,27,16:58.617,Male
4,5K,2016,Rikiya Ichihara,15,17:24.063,Male
5,5K,2016,Matt Hellier,32,17:25.017,Male
6,5K,2016,Erick Lengtat,15,17:30.420,Male
7,5K,2016,Kobe Standefer,15,17:32.587,Male
8,5K,2016,Misty (we Need Your Age) Diaz,0,18:26.793,Female
9,5K,2016,Kevin Kamai,37,19:08.750,Male
