### 2. Web Scraping - Berlin Marathon Results 2014-2019

__Project - BSTN Capstone__
<br>__Beth McGregor__

#### Introduction

This notebook contains the code used for web scraping the results of the Berlin Marathon from 2014-2019. The results for each year were stored in json format. The results were retrieved, processed and saved into csv files by year. 

In [63]:
# Import required libraries
import pandas as pd
import math

import html5lib
import lxml
import requests
from time import sleep
from random import randint

#### Understanding the data format and setting up for webscraping:

In [93]:
"""
1) Get the recordsTotal
2) Loop from 0 to recordsTotal 
"""


# Using requests to get the data
start = 0
URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1903&draw=4&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=placetotal&columns%5B1%5D%5Bname%5D=field-placetotal&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=forename&columns%5B2%5D%5Bname%5D=field-forename&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=name&columns%5B3%5D%5Bname%5D=field-name&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nationality&columns%5B4%5D%5Bname%5D=field-nationality&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=start_no_text&columns%5B5%5D%5Bname%5D=field-start_no_text&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=club&columns%5B6%5D%5Bname%5D=field-club&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635589922727'
content = requests.get(URL)

In [94]:
# Records stored in json format
json = content.json()

# Get a count of total records
json['recordsTotal']

43976

In [95]:
# See what columns are returned and modifty to fit with data web scraped from other races

df = pd.DataFrame(json["data"])
df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = df['splits'].str.split(',', expand=True)
df.drop(['splits', 'club'], axis =1, inplace = True)
df['full_name'] = df['forename'] + " " + df['name']
df.drop(['brutto', '5km', '10km', '15km', '20km', '25km', '30km', '35km', '40km', "forename", "name"], axis = 1, inplace = True)
df.rename(columns = {"placetotal":"place_overall", "start_no_text": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

# Re-order columns to match dataframes from other races
df.reindex(columns= ("place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"))


Unnamed: 0,place_overall,full_name,nationality,bib_number,half_split,finish_time,gender
0,1,Kenenisa Bekele,ETH,2,01:01:05,02:01:41,M
1,2,Birhanu Legese,ETH,5,01:01:05,02:02:48,M
2,3,Sisay Lemma,ETH,4,01:01:06,02:03:36,M
3,4,Jonathan Korir,KEN,7,01:01:06,02:06:45,M
4,5,Felix Kandie,KEN,6,01:02:20,02:08:07,M
...,...,...,...,...,...,...,...
995,936,Mateusz Siekierski,POL,68375,01:24:59,02:48:08,M
996,937,Andre Dahlkamp,GER,16879,01:24:35,02:48:10,M
997,938,Sylvain Zanotti,FRA,72224,01:23:05,02:48:10,M
998,940,Miguel Cunha,POR,53954,01:25:11,02:48:11,M


#### Retrieve the 2019 Berlin Marathon results stored in json format

In [40]:
# This function will retrieve the 2019 Berlin Marathon Results
def get_berlin_results(y):
    
    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1903&draw=4&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=placetotal&columns%5B1%5D%5Bname%5D=field-placetotal&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=forename&columns%5B2%5D%5Bname%5D=field-forename&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=name&columns%5B3%5D%5Bname%5D=field-name&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nationality&columns%5B4%5D%5Bname%5D=field-nationality&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=start_no_text&columns%5B5%5D%5Bname%5D=field-start_no_text&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=club&columns%5B6%5D%5Bname%5D=field-club&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635589922727'
    content = requests.get(URL)

    json = content.json()
    # get total number of records from json
    records_total = json['recordsTotal']
    
    
    results_df = pd.DataFrame(json["data"])
    
    # Calculate maximum number of pages
    max_page = math.floor(records_total/1000)
    
    # Loop through the pages to retrieve results
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1903&draw=4&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=placetotal&columns%5B1%5D%5Bname%5D=field-placetotal&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=forename&columns%5B2%5D%5Bname%5D=field-forename&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=name&columns%5B3%5D%5Bname%5D=field-name&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nationality&columns%5B4%5D%5Bname%5D=field-nationality&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=start_no_text&columns%5B5%5D%5Bname%5D=field-start_no_text&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=club&columns%5B6%5D%5Bname%5D=field-club&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635589922727'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])

        #pausing the script
        sleep(randint(2,5))
    
    # Process output into more desirable format
    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df['splits'].str.split(',', expand=True)
    results_df.drop(['splits', 'club'], axis =1, inplace = True)
    results_df['full_name'] = results_df['forename'] + " " + results_df['name']
    results_df.drop(['brutto', '5km', '10km', '15km', '20km', '25km', '30km', '35km', '40km', "forename", "name"], axis = 1, inplace = True)
    results_df.rename(columns = {"placetotal":"place_overall", "start_no_text": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]

        
    results_df['year'] = y
    
    return results_df     


y = "2019" 

print("Year: ", y)

year_race_results_df = get_berlin_results(y)

# Save the results to a csv file
year_race_results_df.to_csv(f'Berlin_race_results_{y}.csv')

Year:  2019
Page:  1 / 43
Page:  2 / 43
Page:  3 / 43
Page:  4 / 43
Page:  5 / 43
Page:  6 / 43
Page:  7 / 43
Page:  8 / 43
Page:  9 / 43
Page:  10 / 43
Page:  11 / 43
Page:  12 / 43
Page:  13 / 43
Page:  14 / 43
Page:  15 / 43
Page:  16 / 43
Page:  17 / 43
Page:  18 / 43
Page:  19 / 43
Page:  20 / 43
Page:  21 / 43
Page:  22 / 43
Page:  23 / 43
Page:  24 / 43
Page:  25 / 43
Page:  26 / 43
Page:  27 / 43
Page:  28 / 43
Page:  29 / 43
Page:  30 / 43
Page:  31 / 43
Page:  32 / 43
Page:  33 / 43
Page:  34 / 43
Page:  35 / 43
Page:  36 / 43
Page:  37 / 43
Page:  38 / 43
Page:  39 / 43
Page:  40 / 43
Page:  41 / 43
Page:  42 / 43
Page:  43 / 43


#### Retrieve the 2018 Berlin Marathon results stored in json format

In [41]:
# This will retrieve the 2018 Berlin Marathon Results
# This works the same as the code above for 2019

def get_berlin_results(y):

    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=38&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=placetotal&columns%5B1%5D%5Bname%5D=field-placetotal&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=forename&columns%5B2%5D%5Bname%5D=field-forename&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=name&columns%5B3%5D%5Bname%5D=field-name&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nationality&columns%5B4%5D%5Bname%5D=field-nationality&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=start_no_text&columns%5B5%5D%5Bname%5D=field-start_no_text&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=club&columns%5B6%5D%5Bname%5D=field-club&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715480'
    content = requests.get(URL)

    json = content.json()
    
    records_total = json['recordsTotal']
    
    
    results_df = pd.DataFrame(json["data"])
    
    
    max_page = math.floor(records_total/1000)
    
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=38&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=placetotal&columns%5B1%5D%5Bname%5D=field-placetotal&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=forename&columns%5B2%5D%5Bname%5D=field-forename&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=name&columns%5B3%5D%5Bname%5D=field-name&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nationality&columns%5B4%5D%5Bname%5D=field-nationality&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=start_no_text&columns%5B5%5D%5Bname%5D=field-start_no_text&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=club&columns%5B6%5D%5Bname%5D=field-club&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715480'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])
        
        #pausing the script
        sleep(randint(2,5))

    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df['splits'].str.split(',', expand=True)
    results_df.drop(['splits', 'club'], axis =1, inplace = True)
    results_df['full_name'] = results_df['forename'] + " " + results_df['name']
    results_df.drop(['brutto', '5km', '10km', '15km', '20km', '25km', '30km', '35km', '40km', "forename", "name"], axis = 1, inplace = True)
    results_df.rename(columns = {"placetotal":"place_overall", "start_no_text": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]

        
    results_df['year'] = y
    
    return results_df     


y = "2018"
                                
print("Year: ", y)

year_race_results_df = get_berlin_results(y)

year_race_results_df.to_csv(f'Berlin_race_results_{y}.csv')

Year:  2018
Page:  1 / 40
Page:  2 / 40
Page:  3 / 40
Page:  4 / 40
Page:  5 / 40
Page:  6 / 40
Page:  7 / 40
Page:  8 / 40
Page:  9 / 40
Page:  10 / 40
Page:  11 / 40
Page:  12 / 40
Page:  13 / 40
Page:  14 / 40
Page:  15 / 40
Page:  16 / 40
Page:  17 / 40
Page:  18 / 40
Page:  19 / 40
Page:  20 / 40
Page:  21 / 40
Page:  22 / 40
Page:  23 / 40
Page:  24 / 40
Page:  25 / 40
Page:  26 / 40
Page:  27 / 40
Page:  28 / 40
Page:  29 / 40
Page:  30 / 40
Page:  31 / 40
Page:  32 / 40
Page:  33 / 40
Page:  34 / 40
Page:  35 / 40
Page:  36 / 40
Page:  37 / 40
Page:  38 / 40
Page:  39 / 40
Page:  40 / 40


#### Retrieve the 2017 Berlin Marathon results stored in json format

In [89]:
# This will retrieve the 2017 Berlin Marathon Results
# The function is identical to those above

def get_berlin_results(y):

    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=394&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715485'
    content = requests.get(URL)

    json = content.json()
    records_total = json['recordsTotal']
    
    
    results_df = pd.DataFrame(json["data"])
    
    
    max_page = math.floor(records_total/1000)
    
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=394&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715485'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])
        
        #pausing the script
        sleep(randint(2,5))
    
    
    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df["splits"].str.split(",", expand=True)
    results_df["full_name"] = results_df["vorname"] + " " + results_df["nachname"]
    results_df.drop(columns = ["splits", "verein", "brutto", "5km", "10km", "15km", "20km", "25km", "30km", "35km", "40km", "vorname", "nachname"], axis = 1, inplace = True)
    results_df.rename(columns = {"platz":"place_overall", "nation": "nationality", "startnummer": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]

        
    results_df["year"] = y
    
    return results_df     


y = "2017"
                                
print("Year: ", y)

year_race_results_df = get_berlin_results(y)

year_race_results_df.to_csv(f"Berlin_race_results_{y}.csv")

Year:  2017
Page:  1 / 39
Page:  2 / 39
Page:  3 / 39
Page:  4 / 39
Page:  5 / 39
Page:  6 / 39
Page:  7 / 39
Page:  8 / 39
Page:  9 / 39
Page:  10 / 39
Page:  11 / 39
Page:  12 / 39
Page:  13 / 39
Page:  14 / 39
Page:  15 / 39
Page:  16 / 39
Page:  17 / 39
Page:  18 / 39
Page:  19 / 39
Page:  20 / 39
Page:  21 / 39
Page:  22 / 39
Page:  23 / 39
Page:  24 / 39
Page:  25 / 39
Page:  26 / 39
Page:  27 / 39
Page:  28 / 39
Page:  29 / 39
Page:  30 / 39
Page:  31 / 39
Page:  32 / 39
Page:  33 / 39
Page:  34 / 39
Page:  35 / 39
Page:  36 / 39
Page:  37 / 39
Page:  38 / 39
Page:  39 / 39


#### Retrieve the 2016 Berlin Marathon results stored in json format

In [90]:
# This will retrieve the 2016 Berlin Marathon results
# This is identical to the functions above

def get_berlin_results(y):

    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=970&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715486'
    content = requests.get(URL)

    json = content.json()
    records_total = json["recordsTotal"]
    
    
    results_df = pd.DataFrame(json["data"])
    
    
    max_page = math.floor(records_total/1000)
    
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=394&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715485'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])
        #pausing the script
        sleep(randint(2,5))

    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df["splits"].str.split(',', expand=True)
    results_df["full_name"] = results_df["vorname"] + " " + results_df["nachname"]
    results_df.drop(["splits", "verein", "brutto", "5km", "10km", "15km", "20km", "25km", "30km", "35km", "40km", "vorname", "nachname"], axis = 1, inplace = True)
    results_df.rename(columns = {"platz":"place_overall", "nation": "nationality", "startnummer": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]

        
    results_df["year"] = y
    
    return results_df     


y = "2016"
                                
print("Year: ", y)

year_race_results_df = get_berlin_results(y)

year_race_results_df.to_csv(f"Berlin_race_results_{y}.csv")

Year:  2016
Page:  1 / 35
Page:  2 / 35
Page:  3 / 35
Page:  4 / 35
Page:  5 / 35
Page:  6 / 35
Page:  7 / 35
Page:  8 / 35
Page:  9 / 35
Page:  10 / 35
Page:  11 / 35
Page:  12 / 35
Page:  13 / 35
Page:  14 / 35
Page:  15 / 35
Page:  16 / 35
Page:  17 / 35
Page:  18 / 35
Page:  19 / 35
Page:  20 / 35
Page:  21 / 35
Page:  22 / 35
Page:  23 / 35
Page:  24 / 35
Page:  25 / 35
Page:  26 / 35
Page:  27 / 35
Page:  28 / 35
Page:  29 / 35
Page:  30 / 35
Page:  31 / 35
Page:  32 / 35
Page:  33 / 35
Page:  34 / 35
Page:  35 / 35


#### Retrieve the 2015 Berlin Marathon results stored in json format

In [91]:
# This will retrieve the 2015 Berlin Marathon Results
# This function is identical to the functions above
def get_berlin_results(y):

    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1806&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715490'
    content = requests.get(URL)

    json = content.json()
    records_total = json["recordsTotal"]
    
    
    results_df = pd.DataFrame(json["data"])
    
    
    max_page = math.floor(records_total/1000)
    
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1806&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715490'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])
        
        #pausing the script
        sleep(randint(2,5))

    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df['splits'].str.split(',', expand=True)
    results_df["full_name"] = results_df["vorname"] + " " + results_df["nachname"]
    results_df.drop(["splits", "verein", "brutto", "5km", "10km", "15km", "20km", "25km", "30km", '35km', "40km", "vorname", "nachname"], axis = 1, inplace = True)
    results_df.rename(columns = {"platz":"place_overall", "nation": "nationality", "startnummer": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]
        
    results_df["year"] = y
    
    return results_df     

y = "2015"
                                
print("Year: ", y)

year_race_results_df = get_berlin_results(y)

year_race_results_df.to_csv(f"Berlin_race_results_{y}.csv")

Year:  2015
Page:  1 / 36
Page:  2 / 36
Page:  3 / 36
Page:  4 / 36
Page:  5 / 36
Page:  6 / 36
Page:  7 / 36
Page:  8 / 36
Page:  9 / 36
Page:  10 / 36
Page:  11 / 36
Page:  12 / 36
Page:  13 / 36
Page:  14 / 36
Page:  15 / 36
Page:  16 / 36
Page:  17 / 36
Page:  18 / 36
Page:  19 / 36
Page:  20 / 36
Page:  21 / 36
Page:  22 / 36
Page:  23 / 36
Page:  24 / 36
Page:  25 / 36
Page:  26 / 36
Page:  27 / 36
Page:  28 / 36
Page:  29 / 36
Page:  30 / 36
Page:  31 / 36
Page:  32 / 36
Page:  33 / 36
Page:  34 / 36
Page:  35 / 36
Page:  36 / 36


#### Retrieve the 2014 Berlin Marathon results stored in json format

In [92]:
# This will retrieve the 2014 Berlin Marathon Results
# This function is identical to the functions above

def get_berlin_results(y):

    start = 0
    URL = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1766&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715493'
    
    content = requests.get(URL)

    json = content.json()
    records_total = json["recordsTotal"]
    
    
    results_df = pd.DataFrame(json["data"])
    
    
    max_page = math.floor(records_total/1000)
    
    for p in range(1, max_page+1):
        print("Page: ", p, "/", max_page)
        
        #making a request
        start = p*1000
        URL2 = 'https://www.bmw-berlin-marathon.com/?eID=tx_scctiming_results&competition=1766&draw=1&columns%5B0%5D%5Bdata%5D=&columns%5B0%5D%5Bname%5D=details-control&columns%5B0%5D%5Bsearchable%5D=false&columns%5B0%5D%5Borderable%5D=false&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=platz&columns%5B1%5D%5Bname%5D=field-platz&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=vorname&columns%5B2%5D%5Bname%5D=field-vorname&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=nachname&columns%5B3%5D%5Bname%5D=field-nachname&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=nation&columns%5B4%5D%5Bname%5D=field-nation&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=startnummer&columns%5B5%5D%5Bname%5D=field-startnummer&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=verein&columns%5B6%5D%5Bname%5D=field-verein&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=sex&columns%5B7%5D%5Bname%5D=field-sex&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=netto&columns%5B8%5D%5Bname%5D=field-netto&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=brutto&columns%5B9%5D%5Bname%5D=field-brutto&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=&columns%5B10%5D%5Bname%5D=photo&columns%5B10%5D%5Bsearchable%5D=false&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B11%5D%5Bdata%5D=&columns%5B11%5D%5Bname%5D=certificate&columns%5B11%5D%5Bsearchable%5D=false&columns%5B11%5D%5Borderable%5D=false&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=8&order%5B0%5D%5Bdir%5D=asc&start='+str(start)+'&length=1000&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1635717715493'
        json_doc = requests.get(URL2).json()
    
        race_output_df = pd.DataFrame(json_doc["data"])
        
        results_df = pd.concat([results_df, race_output_df])
        
        #pausing the script
        sleep(randint(2,5))

    results_df[["5km", "10km", "15km", "20km", "HM", "25km", "30km", "35km", "40km"]] = results_df["splits"].str.split(',', expand=True)
    results_df["full_name"] = results_df["vorname"] + " " + results_df["nachname"]
    results_df.drop(["splits", "verein", "brutto", "5km", "10km", "15km", "20km", "25km", "30km", "35km", "40km", "vorname", "nachname"], axis = 1, inplace = True)
    results_df.rename(columns = {"platz":"place_overall", "nation": "nationality", "startnummer": "bib_number", "netto":"finish_time", "sex":"gender", "HM": "half_split"}, inplace = True)

    # Re-order columns to match dataframes from other races
    results_df = results_df[["place_overall", "full_name", "nationality", "bib_number", "half_split", "finish_time", "gender"]]

        
    results_df["year"] = y
    
    return results_df     


y = "2014"
                                
print("Year: ", y)

year_race_results_df = get_berlin_results(y)

year_race_results_df.to_csv(f"Berlin_race_results_{y}.csv")

Year:  2014
Page:  1 / 28
Page:  2 / 28
Page:  3 / 28
Page:  4 / 28
Page:  5 / 28
Page:  6 / 28
Page:  7 / 28
Page:  8 / 28
Page:  9 / 28
Page:  10 / 28
Page:  11 / 28
Page:  12 / 28
Page:  13 / 28
Page:  14 / 28
Page:  15 / 28
Page:  16 / 28
Page:  17 / 28
Page:  18 / 28
Page:  19 / 28
Page:  20 / 28
Page:  21 / 28
Page:  22 / 28
Page:  23 / 28
Page:  24 / 28
Page:  25 / 28
Page:  26 / 28
Page:  27 / 28
Page:  28 / 28
