In [1]:
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

import fastf1 as ff1
import numpy as np
import matplotlib as mpl

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

ff1.Cache.enable_cache('./cache') 

In [129]:
# aggregate weather data per race
def agg_weather(weather_data):
    col_agg = {
    'AirTemp':'average',
    'Humidity':'average',
    'Pressure':'average',
    'Rainfall':'max',
    'TrackTemp':'average',
    'WindSpeed':'average'}
    weather_agg = weather_data[['AirTemp','Humidity','Pressure','Rainfall','TrackTemp','WindSpeed']].agg(col_agg,axis=0)
    return weather_agg 

# aggregate lap data according to stints (start/pit stop to pit stop/end)
def agg_tyre(lap_data):
    col_agg = {'TyreLife':'max'}
    lap_agg = lap_data[['Compound','TyreLife','Team','Driver','Stint']].groupby(['Team','Driver','Stint','Compound']).agg(col_agg,axis=0).reset_index()
    return lap_agg 

In [None]:
%%time
# to scrape
# get number of rounds from event schedule
# for each round filter conventional races (exclude training and sprint), get race session (not practice or qualifying)
# for each session, load session data, process and save aggregated value eg rainfall. no header so need to add yourself
weather_df = pd.DataFrame()

for year in range(2018,2021):
    event_schedule = ff1.get_event_schedule(year)
    event_schedule = event_schedule[event_schedule['EventFormat']=='conventional']
    rounds = event_schedule['RoundNumber']

    for roundnum in rounds:
        print("extracting " + str(year) + " " + str(roundnum))
        session = ff1.get_session(year,roundnum,'Race')
        session.load()

        # # weather
        # weather = session.weather_data
        # weather_row = pd.DataFrame(agg_weather(weather)).T
        # # add identifying columns
        # weather_row['round'] = roundnum
        # weather_row['year'] = year
        # weather_row.to_csv('./data/weather.csv',index=False,header=False,mode='a')

        # # tyres
        # laps = session.laps
        # tyre_row = agg_tyre(laps)
        # tyre_row['round'] = roundnum
        # tyre_row['year'] = year
        # tyre_row.to_csv('./data/tyrelife.csv',index=False,header=False,mode='a')

        # telemetry
        unq_drivers = list(set(session.laps.Driver))
        for driver in unq_drivers:
            try:
                tele = session.laps.pick_driver(driver).pick_fastest().telemetry
                tele['round'] = roundnum
                tele['year'] = year
                tele['driver'] = driver
                tele.to_csv('./data/telemetry.csv',index=False,header=False,mode='a')
            except:
                # some drivers may not have lap data
                pass


In [61]:
session.laps

Unnamed: 0,Time,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,LapStartTime,Team,Driver,TrackStatus,IsAccurate,LapStartDate
0,0 days 00:35:12.618000,33,NaT,1,1,0 days 00:00:06.256000,NaT,NaT,0 days 00:00:43.234000,0 days 00:00:40.388000,NaT,0 days 00:34:32.321000,0 days 00:35:12.800000,281.0,282.0,218.0,292.0,False,MEDIUM,4.0,False,0 days 00:33:28.601000,Red Bull Racing,VER,1,False,2020-12-13 13:13:28.798
1,0 days 00:36:54.910000,33,0 days 00:01:42.292000,2,1,NaT,NaT,0 days 00:00:18.014000,0 days 00:00:43.728000,0 days 00:00:40.550000,0 days 00:35:30.632000,0 days 00:36:14.360000,0 days 00:36:54.910000,270.0,273.0,218.0,286.0,False,MEDIUM,5.0,False,0 days 00:35:12.618000,Red Bull Racing,VER,1,True,2020-12-13 13:15:12.815
2,0 days 00:38:37.117000,33,0 days 00:01:42.207000,3,1,NaT,NaT,0 days 00:00:17.980000,0 days 00:00:43.865000,0 days 00:00:40.362000,0 days 00:37:12.890000,0 days 00:37:56.755000,0 days 00:38:37.117000,,270.0,218.0,279.0,False,MEDIUM,6.0,False,0 days 00:36:54.910000,Red Bull Racing,VER,1,True,2020-12-13 13:16:55.107
3,0 days 00:40:19.567000,33,0 days 00:01:42.450000,4,1,NaT,NaT,0 days 00:00:18.028000,0 days 00:00:43.938000,0 days 00:00:40.484000,0 days 00:38:55.145000,0 days 00:39:39.083000,0 days 00:40:19.567000,265.0,268.0,217.0,277.0,False,MEDIUM,7.0,False,0 days 00:38:37.117000,Red Bull Racing,VER,1,True,2020-12-13 13:18:37.314
4,0 days 00:42:02.219000,33,0 days 00:01:42.652000,5,1,NaT,NaT,0 days 00:00:18.009000,0 days 00:00:44.044000,0 days 00:00:40.599000,0 days 00:40:37.576000,0 days 00:41:21.620000,0 days 00:42:02.219000,271.0,271.0,217.0,278.0,False,MEDIUM,8.0,False,0 days 00:40:19.567000,Red Bull Racing,VER,1,True,2020-12-13 13:20:19.764
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1038,0 days 00:40:45.417000,11,0 days 00:01:44.762000,4,1,NaT,NaT,0 days 00:00:18.497000,0 days 00:00:43.515000,0 days 00:00:42.750000,0 days 00:39:19.152000,0 days 00:40:02.667000,0 days 00:40:45.417000,,295.0,213.0,305.0,False,HARD,4.0,True,0 days 00:39:00.655000,Racing Point,PER,1,True,2020-12-13 13:19:00.852
1039,0 days 00:42:29.203000,11,0 days 00:01:43.786000,5,1,NaT,NaT,0 days 00:00:18.299000,0 days 00:00:43.788000,0 days 00:00:41.699000,0 days 00:41:03.716000,0 days 00:41:47.504000,0 days 00:42:29.203000,279.0,286.0,213.0,323.0,False,HARD,5.0,True,0 days 00:40:45.417000,Racing Point,PER,1,True,2020-12-13 13:20:45.614
1040,0 days 00:44:12.466000,11,0 days 00:01:43.263000,6,1,NaT,NaT,0 days 00:00:18.092000,0 days 00:00:43.597000,0 days 00:00:41.574000,0 days 00:42:47.295000,0 days 00:43:30.892000,0 days 00:44:12.466000,,286.0,211.0,324.0,True,HARD,6.0,True,0 days 00:42:29.203000,Racing Point,PER,1,True,2020-12-13 13:22:29.400
1041,0 days 00:45:57.589000,11,0 days 00:01:45.123000,7,1,NaT,NaT,0 days 00:00:18.064000,0 days 00:00:44.150000,0 days 00:00:42.909000,0 days 00:44:30.530000,0 days 00:45:14.680000,0 days 00:45:57.589000,277.0,267.0,214.0,304.0,False,HARD,7.0,True,0 days 00:44:12.466000,Racing Point,PER,1,True,2020-12-13 13:24:12.663


In [142]:
event_schedule = ff1.get_event_schedule(2021)
event_schedule.head(3)

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session2,Session2Date,Session3,Session3Date,Session4,Session4Date,Session5,Session5Date,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2021,2021-03-14,Pre-Season Test,testing,Practice 1,2021-03-12,Practice 2,2021-03-13,Practice 3,2021-03-14,,NaT,,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2021,2021-03-28,Bahrain Grand Prix,conventional,Practice 1,2021-03-26,Practice 2,2021-03-26,Practice 3,2021-03-27,Qualifying,2021-03-27,Race,2021-03-28,True
2,2,Italy,Imola,FORMULA 1 PIRELLI GRAN PREMIO DEL MADE IN ITAL...,2021-04-18,Emilia Romagna Grand Prix,conventional,Practice 1,2021-04-16,Practice 2,2021-04-16,Practice 3,2021-04-17,Qualifying,2021-04-17,Race,2021-04-18,True


In [3]:
# read ergast data
circuits = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/circuits.csv')
constructor_results = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_results.csv')
constructor_standings = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_standings.csv')
constructors = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructors.csv')
driver_standings = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/driver_standings.csv')
drivers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/drivers.csv')
lap_times = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/lap_times.csv')
pit_stops = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/pit_stops.csv')
qualifying = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/qualifying.csv')
races = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/races.csv')
results = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/results.csv')
seasons = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/seasons.csv')
status = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/status.csv')