<h3 style="text-align:center"> Gather <b>Formula 1</b> data from <a href="http://ergast.com/mrd/">Ergast Developer API</a></h3>
<hr>

#### Import Libraries

In [5]:
# - Import Libraries - 
from bs4 import BeautifulSoup
import json
import os
import random
import re
import requests

##### Functions

In [6]:
# - Define Functions -

# -- Display a sample of a dictionnary --
def displayDictSample(dictionary: dict, numSample: int = 5):
    """Display a sample of a dictionnary.
    Args:
        dictionary (dict): the dictionnary to display.
        numSample (int, optional): the number of elements to display. Defaults to 5.
    """    
    key = random.sample(list(dictionary.keys()), k=numSample)
    str_ = f'Sample of the Dictionnary ({numSample} observations):'
    print(f"{str_}\n{'*' * len(str_)} \n")
    for k in sorted(key, reverse=False):
        print(k, dictionary[k])


# -- Update a dictionnary --
def updateDictionnary(inputDict: dict, outputDict: dict, inputKeys: list, outputKeys: list):
    """Update a dictionnary by adding a new key and value based on the existance of the inputKeys within the inputDict.
    Args:
        inputDict (dict): the input dictionnary.
        outputDict (dict): the output dictionnary.
        inputKeys (list): the list of keys to look for in the inputDict.
        outputKeys (list): the list of keys to add in the outputDict depending on the inputKeys.
    """    
    for i_, inputKey in enumerate(inputKeys):
        if inputKey in inputDict.keys():
            outputDict[outputKeys[i_]] = inputDict[inputKey]
    return outputDict


# -- Make a request to the Ergast Developer API --
def makeErgastRequest(requestContent: str, apiLimit: int = 1000, apiOffset: int = 0, apiExport: str = 'json'):
    """Make a request to the Ergast Developer API and return the response.
    Args:
        requestContent (str): the content of the request as a string. It is the content after the Ergast URL.
        apiLimit (int, optional): the number of information to request. Defaults to 1000.
        apiOffset (int, optional): the number of offset to setup. Defaults to 0.
        apiExport (str, optional): the type of exportation (JSON, XML). Defaults to 'json'.
    """    
    setupParameters = {'LIMIT' : apiLimit, 'OFFSET' : apiOffset, 'EXPORT' : apiExport}
    requestURL = f"http://ergast.com/api/f1/{requestContent}.{setupParameters['EXPORT']}?limit={setupParameters['LIMIT']}&offset={setupParameters['OFFSET']}"
    ErgastRequest = requests.get(requestURL, headers={}, data={})
    try:
        if ErgastRequest.status_code == 200:
            return ErgastRequest
        print(f'Request failed with status code {ErgastRequest.status_code}')
    except Exception as e:
        print(f'Error Caught while requesting the Ergast API: {e}')


def exportJSON(dictContent: dict, filePath: str):
    """Export a dictionnary as a JSON file.
    Args:
        dictContent (dict): the dictionary to export.
        filePath (str): the path where to export the file.
    """    
    if not os.path.exists(os.path.dirname(filePath)):
        os.makedirs(os.path.dirname(filePath), exist_ok=True)
    with open(filePath, 'w') as fp:
        json.dump(dictContent, fp, indent=4)

---

#### CORE - API Ergast

In [8]:
# 1° - Get the SEASONS
seasonsReqs = makeErgastRequest('seasons').json()
dictF1 = {seasonContent['season'] : dict() for seasonContent in seasonsReqs['MRData']['SeasonTable']['Seasons']}
# dictF1['CURRENT_SEASON'] = max(list(dictF1.keys()))
displayDictSample(dictF1, 5)

Sample of the Dictionnary (5 observations):
******************************************* 

1963 {}
1973 {}
1986 {}
2000 {}
2013 {}


In [9]:
# 1.bis° - Get the CIRCUITS
circuitsReqs = makeErgastRequest('circuits').json()
circuitsF1 = dict()
for circuitContent in circuitsReqs['MRData']['CircuitTable']['Circuits']:
    circuitsF1[circuitContent['circuitId']] = dict({
        'CIRCUIT_NAME': circuitContent['circuitName'],
        'COUNTRY': circuitContent['Location']['country'],
        'CITY': circuitContent['Location']['locality'],
        'COORDINATES' : (circuitContent['Location']['lat'], circuitContent['Location']['long'])
    })
displayDictSample(circuitsF1, 5)

Sample of the Dictionnary (5 observations):
******************************************* 

hockenheimring {'CIRCUIT_NAME': 'Hockenheimring', 'COUNTRY': 'Germany', 'CITY': 'Hockenheim', 'COORDINATES': ('49.3278', '8.56583')}
montjuic {'CIRCUIT_NAME': 'Montjuïc', 'COUNTRY': 'Spain', 'CITY': 'Barcelona', 'COORDINATES': ('41.3664', '2.15167')}
mugello {'CIRCUIT_NAME': 'Autodromo Internazionale del Mugello', 'COUNTRY': 'Italy', 'CITY': 'Mugello', 'COORDINATES': ('43.9975', '11.3719')}
nurburgring {'CIRCUIT_NAME': 'Nürburgring', 'COUNTRY': 'Germany', 'CITY': 'Nürburg', 'COORDINATES': ('50.3356', '6.9475')}
yas_marina {'CIRCUIT_NAME': 'Yas Marina Circuit', 'COUNTRY': 'UAE', 'CITY': 'Abu Dhabi', 'COORDINATES': ('24.4672', '54.6031')}


In [10]:
# 2.1° - Get the DRIVERS per SEASON
for seasonKey in dictF1.keys():
    driverReqs = makeErgastRequest(f'{seasonKey}/drivers').json()
    dictF1[seasonKey]['DRIVERS'] = dict()
    for driverContent in driverReqs['MRData']['DriverTable']['Drivers']:
        dictDriver = dict()
        dictDriver = updateDictionnary(driverContent, dictDriver, ['permanentNumber', 'code', 'givenName', 'familyName', 'dateOfBirth', 'nationality'], ['PERMANENT_NUMBER', 'CODE', 'FIRST_NAME', 'LAST_NAME', 'DATE_OF_BIRTH', 'NATIONALITY'])
        dictF1[seasonKey]['DRIVERS'][driverContent['driverId']] = dictDriver
displayDictSample(dictF1[random.choice(list(dictF1.keys()))]['DRIVERS'], 5)

Sample of the Dictionnary (5 observations):
******************************************* 

baghetti {'FIRST_NAME': 'Giancarlo', 'LAST_NAME': 'Baghetti', 'DATE_OF_BIRTH': '1934-12-25', 'NATIONALITY': 'Italian'}
ernesto_brambilla {'FIRST_NAME': 'Ernesto', 'LAST_NAME': 'Brambilla', 'DATE_OF_BIRTH': '1934-01-31', 'NATIONALITY': 'Italian'}
gregory {'FIRST_NAME': 'Masten', 'LAST_NAME': 'Gregory', 'DATE_OF_BIRTH': '1932-02-29', 'NATIONALITY': 'American'}
hill {'FIRST_NAME': 'Graham', 'LAST_NAME': 'Hill', 'DATE_OF_BIRTH': '1929-02-15', 'NATIONALITY': 'British'}
phil_hill {'FIRST_NAME': 'Phil', 'LAST_NAME': 'Hill', 'DATE_OF_BIRTH': '1927-04-20', 'NATIONALITY': 'American'}


In [11]:
# 2.2° - Get the CONSTRUCTORS per SEASON
for seasonKey in dictF1.keys():
    constructorReqs = makeErgastRequest(f'{seasonKey}/constructors').json()
    dictF1[seasonKey]['CONSTRUCTORS'] = dict()
    for constructorContent in constructorReqs['MRData']['ConstructorTable']['Constructors']:
        dictConstructor = dict()
        dictConstructor = updateDictionnary(constructorContent, dictConstructor, ['name', 'nationality'], ['NAME', 'NATIONALITY'])
        dictF1[seasonKey]['CONSTRUCTORS'][constructorContent['constructorId']] = dictConstructor
displayDictSample(dictF1[random.choice(list(dictF1.keys()))]['CONSTRUCTORS'], 5)

Sample of the Dictionnary (5 observations):
******************************************* 

ensign {'NAME': 'Ensign', 'NATIONALITY': 'British'}
maki {'NAME': 'Maki', 'NATIONALITY': 'Japanese'}
march {'NAME': 'March', 'NATIONALITY': 'British'}
surtees {'NAME': 'Surtees', 'NATIONALITY': 'British'}
tyrrell {'NAME': 'Tyrrell', 'NATIONALITY': 'British'}


In [12]:
# 2.3° - Get the RACES per SEASON
for seasonKey in dictF1.keys():
    raceReqs = makeErgastRequest(f'{seasonKey}').json()
    dictF1[seasonKey]['RACES'] = dict()
    for raceContent in raceReqs['MRData']['RaceTable']['Races']:
        dictRace = dict()
        dictRace = updateDictionnary(raceContent['Circuit'], dictRace, ['circuitId'], ['CIRCUIT_ID'])
        dictRace['RACE'] = dict()
        dictRace['RACE'] = updateDictionnary(raceContent, dictRace['RACE'], ['raceName', 'date', 'time'], ['NAME', 'DATE', 'TIME'])
        dictRace = updateDictionnary(raceContent, dictRace, ['FirstPractice', 'SecondPractice', 'ThirdPractice', 'Sprint'], ['PRACTICE_1', 'PRACTICE_2', 'PRACTICE_3', 'SPRINT'])
        dictF1[seasonKey]['RACES'][raceContent['round']] = dictRace
displayDictSample(dictF1[random.choice(list(dictF1.keys()))]['RACES'], 5)

Sample of the Dictionnary (5 observations):
******************************************* 

16 {'CIRCUIT_ID': 'shanghai', 'RACE': {'NAME': 'Chinese Grand Prix', 'DATE': '2004-09-26'}}
18 {'CIRCUIT_ID': 'interlagos', 'RACE': {'NAME': 'Brazilian Grand Prix', 'DATE': '2004-10-24'}}
4 {'CIRCUIT_ID': 'imola', 'RACE': {'NAME': 'San Marino Grand Prix', 'DATE': '2004-04-25'}}
6 {'CIRCUIT_ID': 'monaco', 'RACE': {'NAME': 'Monaco Grand Prix', 'DATE': '2004-05-23'}}
8 {'CIRCUIT_ID': 'villeneuve', 'RACE': {'NAME': 'Canadian Grand Prix', 'DATE': '2004-06-13'}}


In [13]:
# 2.4° - Get the RACES RESULTS per SEASON
for seasonKey in dictF1.keys():
    resultReqs = makeErgastRequest(f'{seasonKey}/results').json()
    dictF1[seasonKey]['RESULTS'] = dict()
    for resultContent in resultReqs['MRData']['RaceTable']['Races']:
        dictResult = dict()
        for driverContent in resultContent['Results']:
            dictResult[driverContent['position']] = dict()
            dictResult[driverContent['position']] = updateDictionnary(driverContent['Driver'], dictResult[driverContent['position']], ['driverId'], ['DRIVER_ID'])
            dictResult[driverContent['position']] = updateDictionnary(driverContent['Constructor'], dictResult[driverContent['position']], ['constructorId'], ['CONSTRUCTOR_ID'])
            dictResult[driverContent['position']] = updateDictionnary(driverContent, dictResult[driverContent['position']], ['grid', 'status', 'lap', 'Time', 'FastestLap', 'AverageSpeed'], ['GRID_POSITION', 'STATUS', 'N_LAPS', 'TIME', 'FASTEST_LAP', 'AVERAGE_SPEED'])
            dictF1[seasonKey]['RESULTS'][resultContent['round']] = dictResult
displayDictSample(dictF1[random.choice(list(dictF1.keys()))]['RESULTS'], 5)

Sample of the Dictionnary (5 observations):
******************************************* 

13 {'1': {'DRIVER_ID': 'hamilton', 'CONSTRUCTOR_ID': 'mercedes', 'GRID_POSITION': '1', 'STATUS': 'Finished', 'TIME': {'millis': '4532312', 'time': '1:15:32.312'}, 'FASTEST_LAP': {'rank': '2', 'lap': '50', 'Time': {'time': '1:23.488'}, 'AverageSpeed': {'units': 'kph', 'speed': '249.793'}}}, '2': {'DRIVER_ID': 'bottas', 'CONSTRUCTOR_ID': 'mercedes', 'GRID_POSITION': '4', 'STATUS': 'Finished', 'TIME': {'millis': '4536783', 'time': '+4.471'}, 'FASTEST_LAP': {'rank': '3', 'lap': '53', 'Time': {'time': '1:23.488'}, 'AverageSpeed': {'units': 'kph', 'speed': '249.095'}}}, '3': {'DRIVER_ID': 'vettel', 'CONSTRUCTOR_ID': 'ferrari', 'GRID_POSITION': '6', 'STATUS': 'Finished', 'TIME': {'millis': '4568629', 'time': '+36.317'}, 'FASTEST_LAP': {'rank': '4', 'lap': '51', 'Time': {'time': '1:23.897'}, 'AverageSpeed': {'units': 'kph', 'speed': '248.576'}}}, '4': {'DRIVER_ID': 'ricciardo', 'CONSTRUCTOR_ID': 'red_bull

In [30]:
# 2.5° - Get the RACES RESULTS per LAP per SEASON
for seasonKey in dictF1.keys():
    dictF1[seasonKey]['LAPS'] = dict()
    n_races = len(makeErgastRequest(f'{seasonKey}').json()['MRData']['RaceTable']['Races'])
    for raceNum in range(1, n_races+1):       
        lapReqs = makeErgastRequest(f'{seasonKey}/{raceNum}/laps').json()

        if not lapReqs['MRData']['RaceTable']['Races']:
            dictF1[seasonKey]['LAPS'][raceNum-1] = dict()
            continue
        
        dictLaps = dict()
        for lapsContent in lapReqs['MRData']['RaceTable']['Races'][0]['Laps']:
            dictLaps[lapsContent['number']] = dict()
            for driverContent in lapsContent['Timings']:
                dictLaps[lapsContent['number']][driverContent['driverId']] = dict()
                dictLaps[lapsContent['number']][driverContent['driverId']] = updateDictionnary(driverContent, dictLaps[lapsContent['number']][driverContent['driverId']], ['position', 'time'], ['POSITION', 'TIME'])
        dictF1[seasonKey]['LAPS'][raceNum] = dictLaps
displayDictSample(dictF1[random.choice(list(dictF1.keys()))]['LAPS'], 5)

In [40]:
# 3° - Export the data in a JSON file
pathFile = os.path.join('data', 'formula1-data.json')
exportJSON(dictF1, pathFile)