## Understanding what features are present in a dataframe from F1

In [1]:
import fastf1
import pandas as pd
import plotly.express as px
import os

cache_path = "../f1_cache"
# Check if the cache directory exists
if not os.path.exists(cache_path):
    os.makedirs(cache_path)

# Enable FastF1 caching
fastf1.Cache.enable_cache(cache_path)



In [2]:
session_2024 = fastf1.get_session(2024, "Australian", "R")
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']


In [3]:
session_2024.laps.info()

<class 'fastf1.core.Laps'>
RangeIndex: 998 entries, 0 to 997
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                998 non-null    timedelta64[ns]
 1   Driver              998 non-null    object         
 2   DriverNumber        998 non-null    object         
 3   LapTime             995 non-null    timedelta64[ns]
 4   LapNumber           998 non-null    float64        
 5   Stint               998 non-null    float64        
 6   PitOutTime          37 non-null     timedelta64[ns]
 7   PitInTime           37 non-null     timedelta64[ns]
 8   Sector1Time         977 non-null    timedelta64[ns]
 9   Sector2Time         996 non-null    timedelta64[ns]
 10  Sector3Time         995 non-null    timedelta64[ns]
 11  Sector1SessionTime  972 non-null    timedelta64[ns]
 12  Sector2SessionTime  996 non-null    timedelta64[ns]
 13  Sector3SessionTime  995 non-null    timedelta6

In [4]:
session_2024.laps.TyreLife.head()

0    1.0
1    2.0
2    3.0
3    4.0
4    1.0
Name: TyreLife, dtype: float64

In [5]:
session_2024.laps.FreshTyre.plot(
    backend="plotly",
    kind="histogram",
    title="Fresh Tyre Life Distribution",
    histnorm="percent",
)

In [6]:
session_2024.laps.TyreLife.plot(
    backend="plotly",
    kind="histogram",
    title="Tyre Life Distribution",
    histnorm="percent",
)

In [7]:
session_2024.laps.TyreLife.describe()

count    998.000000
mean      11.741483
std        7.538820
min        1.000000
25%        5.000000
50%       11.000000
75%       17.000000
max       37.000000
Name: TyreLife, dtype: float64

In [8]:
session_2024.laps.Compound.plot(
    backend="plotly",
    kind="histogram",
    title="Compound Distribution",
    color="value",
)

In [9]:
session_2024.laps.PitInTime.plot(
    backend="plotly",
    kind="histogram",
    title="Pit In Time Distribution",
    histnorm="percent",
).show()
session_2024.laps.PitOutTime.plot( 
    backend="plotly",
    kind="histogram",
    title="Pit Out Time Distribution",
    histnorm="percent",
)

In [10]:
# now we decide which features we need
# 1.
time_features = [
    "LapTime",
    "Sector1Time",
    "Sector2Time",
    "Sector3Time",
]
# 2. 
tyre_features = [
    "TyreLife",
    "Compound",
]
# 3.
lap_features = [
    "PitInTime",
    "PitOutTime",
]
# 4. 
driver_features = [
    "Driver",
    "Team",
]
laps_2024 = session_2024.laps[time_features + tyre_features + lap_features + driver_features].copy()
laps_2024.dropna(subset=["LapTime"], inplace=True)
display(laps_2024.shape)
laps_2024.head()

(995, 10)

Unnamed: 0,LapTime,Sector1Time,Sector2Time,Sector3Time,TyreLife,Compound,PitInTime,PitOutTime,Driver,Team
0,0 days 00:01:27.458000,NaT,0 days 00:00:18.462000,0 days 00:00:35.518000,1.0,MEDIUM,NaT,NaT,VER,Red Bull Racing
1,0 days 00:01:24.099000,0 days 00:00:29.623000,0 days 00:00:18.375000,0 days 00:00:36.101000,2.0,MEDIUM,NaT,NaT,VER,Red Bull Racing
2,0 days 00:01:23.115000,0 days 00:00:28.793000,0 days 00:00:18.573000,0 days 00:00:35.749000,3.0,MEDIUM,NaT,NaT,VER,Red Bull Racing
4,0 days 00:01:37.304000,NaT,0 days 00:00:18.854000,0 days 00:00:38.564000,1.0,MEDIUM,NaT,NaT,GAS,Alpine
5,0 days 00:01:24.649000,0 days 00:00:29.905000,0 days 00:00:18.241000,0 days 00:00:36.503000,2.0,MEDIUM,NaT,NaT,GAS,Alpine


In [11]:
laps_2024.isnull().sum()

LapTime          0
Sector1Time     19
Sector2Time      0
Sector3Time      0
TyreLife         0
Compound         0
PitInTime      959
PitOutTime     958
Driver           0
Team             0
dtype: int64

In [12]:
weather_features = [
    "AirTemp",
    "TrackTemp",
    "Humidity",
    "Pressure",
    "WindSpeed",
    "WindDirection",
    "Rainfall",
]
weather_2024 = session_2024.weather_data[weather_features].copy()
display(weather_2024.shape)
weather_2024.describe()

(144, 7)

Unnamed: 0,AirTemp,TrackTemp,Humidity,Pressure,WindSpeed,WindDirection
count,144.0,144.0,144.0,144.0,144.0,144.0
mean,20.622222,38.402083,44.451389,1020.601389,0.953472,154.784722
std,0.852429,0.689428,1.854233,0.551665,0.445005,93.273653
min,19.0,36.7,42.0,1019.8,0.0,0.0
25%,19.975,37.9,43.0,1020.2,0.7,102.0
50%,20.7,38.55,44.0,1020.65,0.9,151.5
75%,21.325,39.0,46.0,1020.9,1.2,204.0
max,22.0,39.6,49.0,1021.7,2.3,357.0


In [13]:
laps_2024.groupby("Driver")[["Sector1Time", "Sector2Time", "Sector3Time"]].mean()

Unnamed: 0_level_0,Sector1Time,Sector2Time,Sector3Time
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ALB,0 days 00:00:29.373892857,0 days 00:00:18.528859649,0 days 00:00:36.702017543
ALO,0 days 00:00:29.153140350,0 days 00:00:18.559741379,0 days 00:00:36.743517241
BOT,0 days 00:00:29.563875,0 days 00:00:18.825824561,0 days 00:00:36.882105263
GAS,0 days 00:00:29.606285714,0 days 00:00:18.633157894,0 days 00:00:36.892421052
HAM,0 days 00:00:29.553357142,0 days 00:00:18.181066666,0 days 00:00:36.928800
HUL,0 days 00:00:29.372894736,0 days 00:00:18.809293103,0 days 00:00:36.664034482
LEC,0 days 00:00:28.882859649,0 days 00:00:18.227741379,0 days 00:00:36.033534482
MAG,0 days 00:00:29.324035714,0 days 00:00:18.435385964,0 days 00:00:36.825140350
NOR,0 days 00:00:28.914263157,0 days 00:00:18.281689655,0 days 00:00:36.017982758
OCO,0 days 00:00:29.603321428,0 days 00:00:18.692157894,0 days 00:00:37.163649122


In [14]:
laps_2024.groupby("Driver")["LapTime"].mean()

Driver
ALB   0 days 00:01:24.769333333
ALO   0 days 00:01:24.617844827
BOT   0 days 00:01:25.424017543
GAS   0 days 00:01:25.312210526
HAM      0 days 00:01:25.228400
HUL   0 days 00:01:25.024068965
LEC   0 days 00:01:23.262224137
MAG   0 days 00:01:24.753052631
NOR   0 days 00:01:23.323224137
OCO   0 days 00:01:25.628456140
PER   0 days 00:01:24.192275862
PIA   0 days 00:01:23.838155172
RIC   0 days 00:01:24.883350877
RUS   0 days 00:01:23.655964285
SAI   0 days 00:01:23.221431034
STR   0 days 00:01:24.828706896
TSU   0 days 00:01:24.869724137
VER   0 days 00:01:24.890666666
ZHO   0 days 00:01:25.458526315
Name: LapTime, dtype: timedelta64[ns]

In [15]:
sector_sum = laps_2024.groupby("Driver")[["Sector1Time", "Sector2Time", "Sector3Time"]].mean().sum(axis=1)
display(sector_sum)
laps_2024.groupby("Driver")["LapTime"].mean() - sector_sum

Driver
ALB   0 days 00:01:24.604770049
ALO   0 days 00:01:24.456398970
BOT   0 days 00:01:25.271804824
GAS   0 days 00:01:25.131864660
HAM   0 days 00:01:24.663223808
HUL   0 days 00:01:24.846222321
LEC   0 days 00:01:23.144135510
MAG   0 days 00:01:24.584562028
NOR   0 days 00:01:23.213935570
OCO   0 days 00:01:25.459128444
PER   0 days 00:01:24.059169691
PIA   0 days 00:01:23.721081064
RIC   0 days 00:01:24.694672931
RUS   0 days 00:01:23.517051623
SAI   0 days 00:01:23.127332122
STR   0 days 00:01:24.690517240
TSU   0 days 00:01:24.724402600
VER   0 days 00:01:23.467333333
ZHO   0 days 00:01:25.214549184
dtype: timedelta64[ns]

Driver
ALB   0 days 00:00:00.164563284
ALO   0 days 00:00:00.161445857
BOT   0 days 00:00:00.152212719
GAS   0 days 00:00:00.180345866
HAM   0 days 00:00:00.565176192
HUL   0 days 00:00:00.177846644
LEC   0 days 00:00:00.118088627
MAG   0 days 00:00:00.168490603
NOR   0 days 00:00:00.109288567
OCO   0 days 00:00:00.169327696
PER   0 days 00:00:00.133106171
PIA   0 days 00:00:00.117074108
RIC   0 days 00:00:00.188677946
RUS   0 days 00:00:00.138912662
SAI   0 days 00:00:00.094098912
STR   0 days 00:00:00.138189656
TSU   0 days 00:00:00.145321537
VER   0 days 00:00:01.423333333
ZHO   0 days 00:00:00.243977131
dtype: timedelta64[ns]