In [1]:
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

import fastf1 as ff1
import numpy as np
import matplotlib as mpl

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

ff1.Cache.enable_cache('./cache') 

In [129]:
# aggregate weather data per race
def agg_weather(weather_data):
    col_agg = {
    'AirTemp':'average',
    'Humidity':'average',
    'Pressure':'average',
    'Rainfall':'max',
    'TrackTemp':'average',
    'WindSpeed':'average'}
    weather_agg = weather_data[['AirTemp','Humidity','Pressure','Rainfall','TrackTemp','WindSpeed']].agg(col_agg,axis=0)
    return weather_agg 

# aggregate lap data according to stints (start/pit stop to pit stop/end)
def agg_tyre(lap_data):
    col_agg = {'TyreLife':'max'}
    lap_agg = lap_data[['Compound','TyreLife','Team','Driver','Stint']].groupby(['Team','Driver','Stint','Compound']).agg(col_agg,axis=0).reset_index()
    return lap_agg 

In [None]:
%%time
# to scrape
# get number of rounds from event schedule
# for each round filter conventional races (exclude training and sprint), get race session (not practice or qualifying)
# for each session, load session data, process and save aggregated value eg rainfall. no header so need to add yourself
weather_df = pd.DataFrame()

for year in range(2018,2021):
    event_schedule = ff1.get_event_schedule(year)
    event_schedule = event_schedule[event_schedule['EventFormat']=='conventional']
    rounds = event_schedule['RoundNumber']

    for roundnum in rounds:
        print("extracting " + str(year) + " " + str(roundnum))
        session = ff1.get_session(year,roundnum,'Race')
        session.load()

        # # weather
        # weather = session.weather_data
        # weather_row = pd.DataFrame(agg_weather(weather)).T
        # # add identifying columns
        # weather_row['round'] = roundnum
        # weather_row['year'] = year
        # weather_row.to_csv('./data/weather.csv',index=False,header=False,mode='a')

        # # tyres
        # laps = session.laps
        # tyre_row = agg_tyre(laps)
        # tyre_row['round'] = roundnum
        # tyre_row['year'] = year
        # tyre_row.to_csv('./data/tyrelife.csv',index=False,header=False,mode='a')

        # telemetry
        unq_drivers = list(set(session.laps.Driver))
        for driver in unq_drivers:
            try:
                tele = session.laps.pick_driver(driver).pick_fastest().telemetry
                tele['round'] = roundnum
                tele['year'] = year
                tele['driver'] = driver
                tele.to_csv('./data/telemetry.csv',index=False,header=False,mode='a')
            except:
                # some drivers may not have lap data
                pass


In [142]:
event_schedule = ff1.get_event_schedule(2021)
event_schedule.head(3)

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session2,Session2Date,Session3,Session3Date,Session4,Session4Date,Session5,Session5Date,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2021,2021-03-14,Pre-Season Test,testing,Practice 1,2021-03-12,Practice 2,2021-03-13,Practice 3,2021-03-14,,NaT,,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2021,2021-03-28,Bahrain Grand Prix,conventional,Practice 1,2021-03-26,Practice 2,2021-03-26,Practice 3,2021-03-27,Qualifying,2021-03-27,Race,2021-03-28,True
2,2,Italy,Imola,FORMULA 1 PIRELLI GRAN PREMIO DEL MADE IN ITAL...,2021-04-18,Emilia Romagna Grand Prix,conventional,Practice 1,2021-04-16,Practice 2,2021-04-16,Practice 3,2021-04-17,Qualifying,2021-04-17,Race,2021-04-18,True


In [3]:
# read ergast data
circuits = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/circuits.csv')
constructor_results = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_results.csv')
constructor_standings = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_standings.csv')
constructors = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructors.csv')
driver_standings = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/driver_standings.csv')
drivers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/drivers.csv')
lap_times = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/lap_times.csv')
pit_stops = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/pit_stops.csv')
qualifying = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/qualifying.csv')
races = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/races.csv')
results = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/results.csv')
seasons = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/seasons.csv')
status = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/status.csv')