In [75]:
import fastf1
import os
import time
from datetime import datetime
import pandas as pd
from fastf1.ergast import Ergast
from fastf1.ergast.interface import ErgastRawResponse

In [76]:
ergast = Ergast()

In [77]:
def get_driver_entry(d):
    first_name = d.get('givenName')
    last_name = d.get('familyName')
    dID = d.get('driverId')
    code = d.get('code')
    country = d.get('nationality')
    age = 0
    if isinstance(d.get('dateOfBirth'), str):
        age = int((datetime.now() - datetime.strptime(d.get('dateOfBirth'), "%Y-%m-%d")).days / 365)
    else:
        age = int((datetime.now() - d.get('dateOfBirth')).days / 365)
    return {"dID": driverIndex, "firstName": first_name, "lastName": last_name, "driverTag": code, "nationality": country, "age": age }

def get_constructor_entry(c):
    name = c.get('name')
    constructorId = c.get('constructorId')
    return {"cID": constructorIndex, "name": name}
     

In [78]:
driverIndex = 0
driverData = []
driverMapping = {} # their driverId => our dID

for season in range(1989,2025):
    drivers = ergast.get_driver_info(season=season, result_type='raw')
    for d in drivers:
        dID = d.get('driverId')
        
        if dID not in driverMapping:
            driverData.append(get_driver_entry(d))
            driverMapping[dID] = driverIndex
            driverIndex += 1
        
    time.sleep(1)

In [79]:
constructorIndex = 0
constructorData = []
constructorMapping = {} # their constructorId => our cID

for season in range(1989,2025):
    constructors = ergast.get_constructor_info(season=season, result_type='raw')
    for c in constructors:
        constructorId = c.get('constructorId')
        
        if constructorId not in constructorMapping:
            constructorData.append(get_constructor_entry(c))
            constructorMapping[constructorId] = constructorIndex
            constructorIndex += 1
            
    time.sleep(1)

In [80]:
index = 0
raceData = []
resultData = []
raceMapping = {} # (season, circuitName) => rID 

In [81]:
def querier(lower, upper):
    global index 
    global driverIndex
    global constructorIndex
    for season in range(lower,upper):
        num_races = len(ergast.get_race_schedule(season=season))
        for i in range(1, num_races + 1):
            results = ergast.get_race_results(season=season, round=i, result_type='raw')
            for result in results:
                circuitName = result.get("Circuit").get("circuitName")
                circuitCountry = result.get("Circuit").get("Location").get("country")
                raceData.append({"cID": index, "name": circuitName, "country": circuitCountry, "round": i, "season": season})
                raceMapping[(season,circuitName)] = index
                
                for r in result.get('Results'):
                    if r.get('Driver').get('driverId') not in driverMapping:
                        driverData.append(get_driver_entry(r.get('Driver')))
                        driverMapping[r.get('Driver').get('driverId')] = driverIndex
                        driverIndex += 1
                    dID = driverMapping[r.get('Driver').get('driverId')]
                    
                    if r.get('Constructor').get('constructorId') not in constructorMapping:
                        constructorData.append(get_constructor_entry( r.get('Constructor')))
                        constructorMapping[ r.get('Constructor').get('constructorId')] = constructorIndex
                        constructorIndex += 1
                    cID = constructorMapping[r.get('Constructor').get('constructorId')]
                    
                    startPos = r.get('grid')
                    finishPos = r.get('position')
                    resultData.append({"dID": dID, "cID": cID, "rID": index, "startPos": startPos, "finishPos": finishPos})
                index += 1
            time.sleep(1)

In [83]:
def checkpoint():
    driver_df = pd.DataFrame(driverData)
    constructor_df = pd.DataFrame(constructorData)
    race_df = pd.DataFrame(raceData)
    result_df = pd.DataFrame(resultData)
    driver_df.to_csv('../output_csv/drivers.csv', index=False)
    constructor_df.to_csv('../output_csv/constructor.csv', index=False)
    race_df.to_csv('../output_csv/race.csv', index=False)
    result_df.to_csv('../output_csv/result.csv', index=False)

def reload():
    global driverData
    global constructorData
    global raceData
    global resultData
    
    driver_df = pd.read_csv('../output_csv/drivers.csv')
    constructor_df = pd.read_csv('../output_csv/constructor.csv')
    race_df = pd.read_csv('../output_csv/race.csv')
    result_df = pd.read_csv('../output_csv/result.csv')
    driverData = driver_df.to_dict(orient='records')
    constructorData = constructor_df.to_dict(orient='records')
    raceData = race_df.to_dict(orient='records')
    resultData = result_df.to_dict(orient='records')

In [82]:
querier(1989,2000)

In [85]:
checkpoint()

In [91]:
reload()
querier(2000,2010)

In [92]:
checkpoint()

In [95]:
reload()
querier(2010,2020)

In [96]:
checkpoint()

In [98]:
reload()
querier(2020,2025)

In [99]:
checkpoint()