In [1]:
#| default_exp dataframe

# Refs

https://docs.fastf1.dev

https://www.linkedin.com/pulse/what-would-likely-median-pit-stop-time-upcoming-miami-marc-llad%C3%B3-xsrmf/

https://pandas.pydata.org/docs/reference/api/

https://www.geeksforgeeks.org/python/how-to-drop-rows-that-contain-a-specific-string-in-pandas/

https://stackoverflow.com/questions/28679930/how-to-drop-rows-from-pandas-data-frame-that-contains-a-particular-string-in-a-p

https://python.plainenglish.io/analyzing-the-2025-brazilian-grand-prix-with-python-and-fastf1-c0fee26b1eb4

In [2]:


!pip install fastf1



# Predicting Pitstop times - Regression

In [1]:
# --- 1. CONFIGURATION AND DATA LOADING ---
session = fastf1.get_session(2023, 'Great Britain','Race')
session.load(laps=True, weather=True, telemetry=True) 

NameError: name 'fastf1' is not defined

In [None]:
laps_df = session.laps.copy()

In [None]:
# Ensure the DataFrame is sorted by LapNumber and Time for reliable shifting
laps_df.sort_values(by=['LapNumber', 'Time'], inplace=True)

# --- 2. IDENTIFY AND MERGE PIT STOP DATA ---

# A. Identify the In-Lap (The lap where we get all our features)
# The In-Lap is the row with a recorded PitInTime
pit_in_laps = laps_df[laps_df['PitInTime'].notna()].copy()

# B. Prepare the Out-Time (The target Y value)
# The PitOutTime is on the *next* lap (by sequence, not index) for each driver.
# We use shift(-1) within a groupby('Driver') to look ahead one row.
laps_df['Next_PitOutTime'] = laps_df.groupby('Driver')['PitOutTime'].shift(-1)

# C. Merge the required PitOutTime back onto the In-Laps
# The Next_PitOutTime column now holds the PitOutTime from the subsequent row (the Out-Lap).
# Filter to keep only the In-Laps that successfully pulled an Out-Time.
final_df = laps_df[laps_df['PitInTime'].notna()].copy()
final_df = final_df[final_df['Next_PitOutTime'].notna()].copy()

print(f"Successfully isolated {len(final_df)} valid pit stop events.")

# --- 3. CALCULATE TARGET VARIABLE (Y) ---

# Y is now accurately calculated using the two timestamps from adjacent rows.
final_df['PitLaneDuration_Y'] = (
    final_df['Next_PitOutTime'] - final_df['PitInTime']
).dt.total_seconds()

In [None]:
final_df.drop(labels=["DriverNumber", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time",
                     "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", 'SpeedI1',
                     'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest', 'LapStartTime','LapStartDate',
                     'Deleted', 'DeletedReason', 'FastF1Generated', 'IsAccurate', "TrackStatus"], axis=1,
            inplace=True)

In [None]:
# Filter out stops that are non-representative outliers (e.g., stops for a minute due to damage)
final_df = final_df[final_df['PitLaneDuration_Y'].between(15, 60)].copy()

In [None]:
# https://www.linkedin.com/pulse/what-would-likely-median-pit-stop-time-upcoming-miami-marc-llad%C3%B3-xsrmf/
# Pick laps with pit stops
session.laps.pick_box_laps()

# https://python.plainenglish.io/analyzing-the-2025-brazilian-grand-prix-with-python-and-fastf1-c0fee26b1eb4
# For each driver calculate pit stop time
session.laps.pick_drivers(1)

# Order by driver and lap number
pit_stop_laps.sort_values(by=['Driver','LapNumber'])

In [None]:
final_df

In [None]:
len(final_df.index)

In [None]:
session.laps

In [None]:
# Get data

import fastf1
import pandas as pd

# 1. Configuration (Example)
YEAR, GP, SESSION_TYPE = 2024, 'Canada', 'Race'

# 2. Load the Session Data
session = fastf1.get_session(YEAR, GP, SESSION_TYPE)
session.load(laps=True, telemetry=True)

In [None]:
session.laps

In [None]:
print(session.laps.columns)

In [None]:
# Clean data
laps_df = session.laps

# Drop unecessary labels
laps_df.drop(labels=["LapTime", "Sector1Time", "Sector2Time", "Sector3Time",
                     "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", 'SpeedI1',
                     'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest', 'LapStartTime','LapStartDate',
                     'Deleted', 'DeletedReason', 'FastF1Generated', 'IsAccurate'], axis=1,
            inplace=True)

In [None]:
laps_df

In [None]:
# Keep laps with pit times
pit_stop_laps = laps_df.dropna(subset=["PitInTime", "PitOutTime"], how="all")
pit_stop_laps

In [None]:
pit_stop_laps.loc[pit_stop_final.Driver == "VER"]

In [None]:
# Example pit stop time
from datetime import timedelta
pit_in_time = pit_stop_laps.loc[pit_stop_final.Driver == "VER"].iloc[0, 4]
pit_out_time = pit_stop_laps.loc[pit_stop_final.Driver == "VER"].iloc[1, 3]


print(pit_in_time)
print(pit_out_time)
# type(pit_in_time)
# type(pit_out_time)

print(f"Total pit time for VER: {pit_out_time - pit_in_time}")

In [None]:
pit_stop_laps.loc[182]

In [None]:
pit_stop_laps.loc[183]

In [None]:
pit_stop_laps.loc[191]

In [None]:
"""
1st iteration
1. len(d) == 0

2. same driver; log more info
- log new compound
- log pit time

3. driver is retired

"""

In [None]:
# Create a new dataframe - UNRELIABLE
d = [{}]
prev_index = -1
prev_driver = ""

for i, r in pit_stop_laps.iterrows():
    curr_driver = r["Driver"]
    
    # Log first driver
    if prev_index == -1 and len(prev_driver) == 0:
        d.append({
            "LapNumber": r["LapNumber"],
            "TyreLife": r["TyreLife"],
            "Position": r["Position"],
            "CumulativeStops": r["Stint"] - 1,
            "OldCompound": r["Compound"],
        })
        
        prev_index = i
        prev_driver = r["Driver"]
        
    # Log info for same driver
    elif (prev_index - i == 1) and (prev_driver == curr_driver):
        d[-1]["NewCompound"] = r["Compound"]
        d[-1]["TotalPitTime"] = r["PitOutTime"] - d[-1]["PitInTime"]

        prev_index = i
        prev_driver = r["Driver"]
        
    # Previous driver Retired -> Remove previous pitstop entry
    elif (prev_index - i != 1) and (prev_driver != curr_driver):
        d.pop()
        previous_retired = True
    
df = pd.DataFrame({})

In [None]:
pit_stop_laps.groupby('Driver').cumcount()

In [None]:
pit_in_stop_laps

In [None]:
pit_out_stop_laps

In [None]:
pit_stop_laps = laps_df.loc[laps_df['PitInTime'].notna() | laps_df['PitOutTime'].notna()].copy()

In [None]:
pit_stop_laps

In [None]:
lean_pit_stop_laps = pit_stop_laps.drop(labels=["DriverNumber", 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime','FastF1Generated', 'IsAccurate'], axis=1)



In [None]:
lean_pit_stop_laps

---

# Create F1 PitStop dataframe

In [88]:
import fastf1
import pandas as pd
from datetime import datetime
import sys

In [89]:
session = fastf1.get_session(2024, "United States", 'R')
session.load()

DEBUG:fastf1.fastf1.events:Traceback for failure in FastF1 schedule
Traceback (most recent call last):
  File "/home/master/.pyenv/versions/miniconda3-latest/envs/ml/lib/python3.10/site-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
  File "/home/master/.pyenv/versions/miniconda3-latest/envs/ml/lib/python3.10/site-packages/fastf1/events.py", line 599, in _get_schedule_ff1
    response = Cache.requests_get(
  File "/home/master/.pyenv/versions/miniconda3-latest/envs/ml/lib/python3.10/site-packages/fastf1/req.py", line 303, in requests_get
    return cls._cached_request('GET', url, **kwargs)
  File "/home/master/.pyenv/versions/miniconda3-latest/envs/ml/lib/python3.10/site-packages/fastf1/req.py", line 347, in _cached_request
    response = func(url, **kwargs)
  File "/home/master/.pyenv/versions/miniconda3-latest/envs/ml/lib/python3.10/site-packages/requests_cache/session.py", line 127, in get
    return self.request('GET', url, params=params, **kwar

ValueError: Failed to load any schedule data.

In [14]:
session_laps = session.laps
session_laps.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
0,0 days 00:58:52.888000,VER,1,0 days 00:01:41.657000,1.0,1.0,NaT,NaT,NaT,0 days 00:00:40.291000,...,True,Red Bull Racing,0 days 00:57:10.953000,2024-10-20 19:03:48.551,12,2.0,False,,False,False
1,0 days 01:00:32.879000,VER,1,0 days 00:01:39.991000,2.0,1.0,NaT,NaT,0 days 00:00:26.845000,0 days 00:00:39.996000,...,True,Red Bull Racing,0 days 00:58:52.888000,2024-10-20 19:05:30.486,1,2.0,False,,False,True
2,0 days 01:02:35.728000,VER,1,0 days 00:02:02.849000,3.0,1.0,NaT,NaT,0 days 00:00:26.980000,0 days 00:00:46.144000,...,True,Red Bull Racing,0 days 01:00:32.879000,2024-10-20 19:07:10.477,124,2.0,False,,False,False
3,0 days 01:05:07.267000,VER,1,NaT,4.0,1.0,NaT,NaT,0 days 00:00:40.222000,0 days 00:00:58.115000,...,True,Red Bull Racing,0 days 01:02:35.728000,2024-10-20 19:09:13.326,4,2.0,False,,False,False
4,0 days 01:07:54.813000,VER,1,NaT,5.0,1.0,NaT,NaT,0 days 00:00:40.279000,0 days 00:01:01.864000,...,True,Red Bull Racing,0 days 01:05:07.267000,2024-10-20 19:11:44.865,41,2.0,False,,False,False


In [15]:
session_laps.describe()

Unnamed: 0,Time,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,TyreLife,LapStartTime,LapStartDate,Position
count,1059,1031,1059.0,1059.0,24,23,1038,1058,1058,1036,1058,1058,949.0,1058.0,1035.0,1058.0,1059.0,1059,1058,1058.0
mean,0 days 01:47:05.935165250,0 days 00:01:41.956361784,28.267233,1.613787,0 days 01:43:56.015375,0 days 01:45:32.315391304,0 days 00:00:27.824805394,0 days 00:00:41.038068052,0 days 00:00:34.381650283,0 days 01:46:47.508765444,0 days 01:46:34.254538752,0 days 01:47:08.637552930,210.143309,177.766541,199.4,301.103025,14.595845,0 days 01:45:22.623875354,2024-10-20 19:52:02.852567040,9.969754
min,0 days 00:58:51.218000,0 days 00:01:37.330000,1.0,1.0,0 days 00:57:20.820000,0 days 01:03:02.808000,0 days 00:00:26.007000,0 days 00:00:38.874000,0 days 00:00:32.007000,0 days 00:59:18.384000,0 days 00:58:18.467000,0 days 00:58:51.373000,117.0,75.0,70.0,97.0,1.0,0 days 00:57:10.953000,2024-10-20 19:03:48.551000,1.0
25%,0 days 01:23:22.471000,0 days 00:01:39.299500,14.0,1.0,0 days 01:30:22.322000,0 days 01:32:17.366500,0 days 00:00:26.622750,0 days 00:00:39.740500,0 days 00:00:32.865250,0 days 01:23:45.737000,0 days 01:22:49.679250,0 days 01:23:23.518250,206.0,178.0,201.0,302.0,7.0,0 days 01:21:40.315500,2024-10-20 19:28:18.962000128,5.0
50%,0 days 01:47:14.006000,0 days 00:01:40.338000,28.0,2.0,0 days 01:44:45.366000,0 days 01:45:04.130000,0 days 00:00:26.964000,0 days 00:00:40.089000,0 days 00:00:33.249000,0 days 01:47:14.588000,0 days 01:46:41.084500,0 days 01:47:15.100500,212.0,181.0,202.0,305.0,14.0,0 days 01:45:31.074000,2024-10-20 19:52:08.970499840,10.0
75%,0 days 02:10:46.786500,0 days 00:01:41.524500,42.0,2.0,0 days 01:57:00.646250,0 days 01:57:31.558500,0 days 00:00:27.329000,0 days 00:00:40.461000,0 days 00:00:33.772000,0 days 02:09:47.988250,0 days 02:10:14.665750,0 days 02:10:47.581000,218.0,183.0,204.0,309.0,21.0,0 days 02:09:07.458000,2024-10-20 20:15:45.867500032,15.0
max,0 days 02:33:46.386000,0 days 00:02:29.840000,56.0,3.0,0 days 02:26:21.090000,0 days 02:25:54.902000,0 days 00:01:00.911000,0 days 00:01:03.897000,0 days 00:01:05.772000,0 days 02:32:34.024000,0 days 02:33:13.541000,0 days 02:33:46.468000,232.0,195.0,210.0,344.0,40.0,0 days 02:32:07.617000,2024-10-20 20:38:45.215000,20.0
std,0 days 00:27:17.171726441,0 days 00:00:07.281668870,16.095103,0.598548,0 days 00:20:14.717935445,0 days 00:18:03.447312827,0 days 00:00:03.759700730,0 days 00:00:04.201113797,0 days 00:00:04.838745313,0 days 00:26:44.491842620,0 days 00:27:17.695200543,0 days 00:27:15.680980492,13.598931,13.619138,16.612386,30.307378,9.187864,0 days 00:27:21.874356061,,5.479732


In [8]:
session_laps.info()

<class 'fastf1.core.Laps'>
RangeIndex: 1135 entries, 0 to 1134
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                1135 non-null   timedelta64[ns]
 1   Driver              1135 non-null   object         
 2   DriverNumber        1135 non-null   object         
 3   LapTime             1097 non-null   timedelta64[ns]
 4   LapNumber           1135 non-null   float64        
 5   Stint               1135 non-null   float64        
 6   PitOutTime          35 non-null     timedelta64[ns]
 7   PitInTime           36 non-null     timedelta64[ns]
 8   Sector1Time         1098 non-null   timedelta64[ns]
 9   Sector2Time         1133 non-null   timedelta64[ns]
 10  Sector3Time         1133 non-null   timedelta64[ns]
 11  Sector1SessionTime  1096 non-null   timedelta64[ns]
 12  Sector2SessionTime  1133 non-null   timedelta64[ns]
 13  Sector3SessionTime  1133 non-null   timedelt

In [9]:
# Pick pit stop laps
# Sort by Driver and LapNumber - to extract Pit Stop times
pit_stop_laps = session_laps.pick_box_laps(which='both').sort_values(by=['Driver','LapNumber'])
pit_stop_laps.reset_index(inplace=True)

In [10]:
# Only calculate the PitStopTime if the previous row Driver is the same as current row
pit_stop_laps.loc[pit_stop_laps['Driver'].shift(-1) == pit_stop_laps['Driver'], 'PitStopTime'] = pit_stop_laps['PitOutTime'].shift(-1) - pit_stop_laps['PitInTime']

In [11]:
# Drop rows where "PitStopTime" is nan
pit_stop_laps = pit_stop_laps.dropna(subset=['PitStopTime'])
pit_stop_laps.reset_index(inplace=True)
pit_stop_laps.loc[:, ["Driver", "Position", "LapNumber", "PitStopTime", "TrackStatus"]]

Unnamed: 0,Driver,Position,LapNumber,PitStopTime,TrackStatus
0,ALO,8.0,27.0,0 days 00:00:25.130000,126
1,ALO,9.0,32.0,0 days 00:23:42.914000,45
2,BEA,11.0,27.0,0 days 00:00:35.907000,126
3,BEA,15.0,32.0,0 days 00:23:44.776000,45
4,BOT,12.0,32.0,0 days 00:23:42.111000,45
5,COL,15.0,26.0,0 days 00:00:25.294000,1
6,GAS,3.0,32.0,0 days 00:23:27.544000,45
7,HAM,12.0,27.0,0 days 00:00:25.254000,126
8,HAM,11.0,32.0,0 days 00:23:45.721000,45
9,HUL,10.0,25.0,0 days 00:00:25.515000,1


In [12]:
print(f"Entries with red flags: {len(pit_stop_laps[pit_stop_laps.TrackStatus.str.contains('5')])}")

Entries with red flags: 17


In [13]:
# Drop PitStopTimes where TrackStatus has 5 in it
# 5 == Red flag
print(pit_stop_laps.TrackStatus.iloc[0])
type(pit_stop_laps.TrackStatus.iloc[0])

126


str

In [14]:
pit_stop_laps[~pit_stop_laps.TrackStatus.str.contains("5")]

Unnamed: 0,level_0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,0,233,0 days 01:50:41.754000,ALO,14,0 days 00:01:38.089000,27.0,1.0,NaT,0 days 01:50:38.911000,...,Aston Martin,0 days 01:49:03.665000,2024-11-03 16:27:06.411,126,8.0,False,,False,False,0 days 00:00:25.130000
2,4,846,0 days 01:50:53.532000,BEA,50,0 days 00:01:38.431000,27.0,1.0,NaT,0 days 01:50:50.609000,...,Haas F1 Team,0 days 01:49:15.101000,2024-11-03 16:27:17.847,126,11.0,False,,False,False,0 days 00:00:35.907000
5,10,746,0 days 01:49:24.019000,COL,43,0 days 00:01:29.885000,26.0,1.0,NaT,0 days 01:49:21.143000,...,Williams,0 days 01:47:54.134000,2024-11-03 16:25:56.880,1,15.0,False,,False,False,0 days 00:00:25.294000
7,15,777,0 days 01:50:55.929000,HAM,44,0 days 00:01:40.067000,27.0,1.0,NaT,0 days 01:50:53.060000,...,Mercedes,0 days 01:49:15.862000,2024-11-03 16:27:18.608,126,12.0,False,,False,False,0 days 00:00:25.254000
9,19,508,0 days 01:47:45.175000,HUL,27,0 days 00:01:30.410000,25.0,1.0,NaT,0 days 01:47:42.044000,...,Haas F1 Team,0 days 01:46:14.765000,2024-11-03 16:24:17.511,1,10.0,False,,False,False,0 days 00:00:25.515000
10,21,510,0 days 01:53:22.150000,HUL,27,NaT,27.0,2.0,NaT,0 days 01:53:19.090000,...,Haas F1 Team,0 days 01:49:31.818000,2024-11-03 16:27:34.564,1267,18.0,False,,False,False,0 days 00:00:59.099000
11,23,512,0 days 01:58:18.344000,HUL,27,0 days 00:02:07.542000,29.0,3.0,NaT,0 days 01:58:15.350000,...,Haas F1 Team,0 days 01:56:10.802000,2024-11-03 16:34:13.548,4,18.0,False,,False,False,0 days 00:00:28.730000
12,25,541,0 days 01:52:36.179000,LAW,30,0 days 00:01:53.045000,28.0,1.0,NaT,0 days 01:52:33.272000,...,RB,0 days 01:50:43.134000,2024-11-03 16:28:45.880,671,7.0,False,,False,False,0 days 00:00:28.240000
14,29,299,0 days 01:46:07.105000,LEC,16,0 days 00:01:28.997000,24.0,1.0,NaT,0 days 01:46:04.229000,...,Ferrari,0 days 01:44:38.108000,2024-11-03 16:22:40.854,1,6.0,False,,False,False,0 days 00:00:24.600000
16,33,679,0 days 01:52:02.076000,NOR,4,0 days 00:01:55.212000,28.0,1.0,NaT,0 days 01:51:59.156000,...,McLaren,0 days 01:50:06.864000,2024-11-03 16:28:09.610,2671,2.0,False,,False,False,0 days 00:00:24.816000


In [15]:
pit_stop_laps = pit_stop_laps[~pit_stop_laps.TrackStatus.str.contains("5")]
pit_stop_laps

Unnamed: 0,level_0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,0,233,0 days 01:50:41.754000,ALO,14,0 days 00:01:38.089000,27.0,1.0,NaT,0 days 01:50:38.911000,...,Aston Martin,0 days 01:49:03.665000,2024-11-03 16:27:06.411,126,8.0,False,,False,False,0 days 00:00:25.130000
2,4,846,0 days 01:50:53.532000,BEA,50,0 days 00:01:38.431000,27.0,1.0,NaT,0 days 01:50:50.609000,...,Haas F1 Team,0 days 01:49:15.101000,2024-11-03 16:27:17.847,126,11.0,False,,False,False,0 days 00:00:35.907000
5,10,746,0 days 01:49:24.019000,COL,43,0 days 00:01:29.885000,26.0,1.0,NaT,0 days 01:49:21.143000,...,Williams,0 days 01:47:54.134000,2024-11-03 16:25:56.880,1,15.0,False,,False,False,0 days 00:00:25.294000
7,15,777,0 days 01:50:55.929000,HAM,44,0 days 00:01:40.067000,27.0,1.0,NaT,0 days 01:50:53.060000,...,Mercedes,0 days 01:49:15.862000,2024-11-03 16:27:18.608,126,12.0,False,,False,False,0 days 00:00:25.254000
9,19,508,0 days 01:47:45.175000,HUL,27,0 days 00:01:30.410000,25.0,1.0,NaT,0 days 01:47:42.044000,...,Haas F1 Team,0 days 01:46:14.765000,2024-11-03 16:24:17.511,1,10.0,False,,False,False,0 days 00:00:25.515000
10,21,510,0 days 01:53:22.150000,HUL,27,NaT,27.0,2.0,NaT,0 days 01:53:19.090000,...,Haas F1 Team,0 days 01:49:31.818000,2024-11-03 16:27:34.564,1267,18.0,False,,False,False,0 days 00:00:59.099000
11,23,512,0 days 01:58:18.344000,HUL,27,0 days 00:02:07.542000,29.0,3.0,NaT,0 days 01:58:15.350000,...,Haas F1 Team,0 days 01:56:10.802000,2024-11-03 16:34:13.548,4,18.0,False,,False,False,0 days 00:00:28.730000
12,25,541,0 days 01:52:36.179000,LAW,30,0 days 00:01:53.045000,28.0,1.0,NaT,0 days 01:52:33.272000,...,RB,0 days 01:50:43.134000,2024-11-03 16:28:45.880,671,7.0,False,,False,False,0 days 00:00:28.240000
14,29,299,0 days 01:46:07.105000,LEC,16,0 days 00:01:28.997000,24.0,1.0,NaT,0 days 01:46:04.229000,...,Ferrari,0 days 01:44:38.108000,2024-11-03 16:22:40.854,1,6.0,False,,False,False,0 days 00:00:24.600000
16,33,679,0 days 01:52:02.076000,NOR,4,0 days 00:01:55.212000,28.0,1.0,NaT,0 days 01:51:59.156000,...,McLaren,0 days 01:50:06.864000,2024-11-03 16:28:09.610,2671,2.0,False,,False,False,0 days 00:00:24.816000


In [6]:
#|export
def create_pit_stop_times_dataframe_for_a_race(session_laps_df):
    
    """
    Creates DataFrame with PitStopTimes column from a race session.
    INPUT: Race session from fastF1
    OUTPUT: DataFrame with PitStopTime column calculated.
    """
    
    # Pick laps with PitTimeIn and PitTImeOut
    pit_stop_laps = session_laps_df.pick_box_laps(which='both').sort_values(by=['Driver','LapNumber'])
    pit_stop_laps.reset_index(inplace=True)
    
    # Create new column and calculate PitStopTIme
    pit_stop_laps.loc[pit_stop_laps['Driver'].shift(-1) == pit_stop_laps['Driver'], 'PitStopTime'] = pit_stop_laps['PitOutTime'].shift(-1) - pit_stop_laps['PitInTime']
    pit_stop_laps = pit_stop_laps.dropna(subset=['PitStopTime'])

    return pit_stop_laps

In [17]:
pit_stops_df = create_pit_stop_times_dataframe(session.laps)
pit_stops_df

Unnamed: 0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,233,0 days 01:50:41.754000,ALO,14,0 days 00:01:38.089000,27.0,1.0,NaT,0 days 01:50:38.911000,0 days 00:00:21.926000,...,Aston Martin,0 days 01:49:03.665000,2024-11-03 16:27:06.411,126,8.0,False,,False,False,0 days 00:00:25.130000
2,238,0 days 02:01:04.831000,ALO,14,NaT,32.0,2.0,NaT,0 days 02:00:58.391000,0 days 00:00:32,...,Aston Martin,0 days 01:58:23.789000,2024-11-03 16:36:26.535,45,9.0,False,,False,False,0 days 00:23:42.914000
4,846,0 days 01:50:53.532000,BEA,50,0 days 00:01:38.431000,27.0,1.0,NaT,0 days 01:50:50.609000,0 days 00:00:22.089000,...,Haas F1 Team,0 days 01:49:15.101000,2024-11-03 16:27:17.847,126,11.0,False,,False,False,0 days 00:00:35.907000
6,851,0 days 02:01:28.072000,BEA,50,NaT,32.0,2.0,NaT,0 days 02:01:13.837000,0 days 00:00:31.026000,...,Haas F1 Team,0 days 01:58:49.215000,2024-11-03 16:36:51.961,45,15.0,False,,False,False,0 days 00:23:44.776000
8,1028,0 days 02:01:16.115000,BOT,77,NaT,32.0,1.0,NaT,0 days 02:01:08.368000,0 days 00:00:29.529000,...,Kick Sauber,0 days 01:58:44.240000,2024-11-03 16:36:46.986,45,12.0,False,,False,False,0 days 00:23:42.111000
10,746,0 days 01:49:24.019000,COL,43,0 days 00:01:29.885000,26.0,1.0,NaT,0 days 01:49:21.143000,0 days 00:00:22.045000,...,Williams,0 days 01:47:54.134000,2024-11-03 16:25:56.880,1,15.0,False,,False,False,0 days 00:00:25.294000
13,100,0 days 02:00:47.666000,GAS,10,NaT,32.0,1.0,NaT,0 days 02:00:42.932000,0 days 00:00:35.541000,...,Alpine,0 days 01:58:06.705000,2024-11-03 16:36:09.451,45,3.0,False,,False,False,0 days 00:23:27.544000
15,777,0 days 01:50:55.929000,HAM,44,0 days 00:01:40.067000,27.0,1.0,NaT,0 days 01:50:53.060000,0 days 00:00:21.914000,...,Mercedes,0 days 01:49:15.862000,2024-11-03 16:27:18.608,126,12.0,False,,False,False,0 days 00:00:25.254000
17,782,0 days 02:01:10.205000,HAM,44,NaT,32.0,2.0,NaT,0 days 02:01:02.996000,0 days 00:00:32.651000,...,Mercedes,0 days 01:58:29.118000,2024-11-03 16:36:31.864,45,11.0,False,,False,False,0 days 00:23:45.721000
19,508,0 days 01:47:45.175000,HUL,27,0 days 00:01:30.410000,25.0,1.0,NaT,0 days 01:47:42.044000,0 days 00:00:21.555000,...,Haas F1 Team,0 days 01:46:14.765000,2024-11-03 16:24:17.511,1,10.0,False,,False,False,0 days 00:00:25.515000


In [7]:
#|export
def drop_red_flags(pit_stop_laps):
    """
    Drop entries which contain TrackStatus 5
    """
    
    # Drop entries with Red flags
    pit_stop_laps = pit_stop_laps[~pit_stop_laps.TrackStatus.str.contains("5")]
    
    return pit_stop_laps

In [19]:
pit_stops_df = drop_red_flags(pit_stops_df)
pit_stops_df

Unnamed: 0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,233,0 days 01:50:41.754000,ALO,14,0 days 00:01:38.089000,27.0,1.0,NaT,0 days 01:50:38.911000,0 days 00:00:21.926000,...,Aston Martin,0 days 01:49:03.665000,2024-11-03 16:27:06.411,126,8.0,False,,False,False,0 days 00:00:25.130000
4,846,0 days 01:50:53.532000,BEA,50,0 days 00:01:38.431000,27.0,1.0,NaT,0 days 01:50:50.609000,0 days 00:00:22.089000,...,Haas F1 Team,0 days 01:49:15.101000,2024-11-03 16:27:17.847,126,11.0,False,,False,False,0 days 00:00:35.907000
10,746,0 days 01:49:24.019000,COL,43,0 days 00:01:29.885000,26.0,1.0,NaT,0 days 01:49:21.143000,0 days 00:00:22.045000,...,Williams,0 days 01:47:54.134000,2024-11-03 16:25:56.880,1,15.0,False,,False,False,0 days 00:00:25.294000
15,777,0 days 01:50:55.929000,HAM,44,0 days 00:01:40.067000,27.0,1.0,NaT,0 days 01:50:53.060000,0 days 00:00:21.914000,...,Mercedes,0 days 01:49:15.862000,2024-11-03 16:27:18.608,126,12.0,False,,False,False,0 days 00:00:25.254000
19,508,0 days 01:47:45.175000,HUL,27,0 days 00:01:30.410000,25.0,1.0,NaT,0 days 01:47:42.044000,0 days 00:00:21.555000,...,Haas F1 Team,0 days 01:46:14.765000,2024-11-03 16:24:17.511,1,10.0,False,,False,False,0 days 00:00:25.515000
21,510,0 days 01:53:22.150000,HUL,27,NaT,27.0,2.0,NaT,0 days 01:53:19.090000,NaT,...,Haas F1 Team,0 days 01:49:31.818000,2024-11-03 16:27:34.564,1267,18.0,False,,False,False,0 days 00:00:59.099000
23,512,0 days 01:58:18.344000,HUL,27,0 days 00:02:07.542000,29.0,3.0,NaT,0 days 01:58:15.350000,0 days 00:00:34.579000,...,Haas F1 Team,0 days 01:56:10.802000,2024-11-03 16:34:13.548,4,18.0,False,,False,False,0 days 00:00:28.730000
25,541,0 days 01:52:36.179000,LAW,30,0 days 00:01:53.045000,28.0,1.0,NaT,0 days 01:52:33.272000,0 days 00:00:28.493000,...,RB,0 days 01:50:43.134000,2024-11-03 16:28:45.880,671,7.0,False,,False,False,0 days 00:00:28.240000
29,299,0 days 01:46:07.105000,LEC,16,0 days 00:01:28.997000,24.0,1.0,NaT,0 days 01:46:04.229000,0 days 00:00:21.672000,...,Ferrari,0 days 01:44:38.108000,2024-11-03 16:22:40.854,1,6.0,False,,False,False,0 days 00:00:24.600000
33,679,0 days 01:52:02.076000,NOR,4,0 days 00:01:55.212000,28.0,1.0,NaT,0 days 01:51:59.156000,0 days 00:00:27.821000,...,McLaren,0 days 01:50:06.864000,2024-11-03 16:28:09.610,2671,2.0,False,,False,False,0 days 00:00:24.816000


In [22]:
# Can we remove outliers using mean?
pit_stops_df["PitStopTime"].mean()

Timedelta('0 days 00:00:28.532058823')

---

# Extract races per year (from a season)

Create  a dataframe which will carry all pit stops for all races for the given years.

In [32]:
#|export
import fastf1
import pandas as pd
import sys

import logging
logger = logging.getLogger(__name__)
# logging.basicConfig(level=logging.DEBUG)

# Setup cache
from pathlib import Path
cache_dir = Path.home() / '.cache' / 'fastf1' # Standard Linux/macOS default
cache_dir.mkdir(parents=True, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)
logger.info(f"FastF1 cache enabled at: {cache_dir}")

In [3]:
# Extract GP events per year
year = 2024
event_schedule = fastf1.get_event_schedule(year)
event_schedule.head()



Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2024,2024-02-23,Pre-Season Testing,testing,Practice 1,2024-02-21 10:00:00+03:00,2024-02-21 07:00:00,...,Practice 3,2024-02-23 10:00:00+03:00,2024-02-23 07:00:00,,NaT,NaT,,NaT,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True
2,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,2024-03-07 16:30:00+03:00,2024-03-07 13:30:00,...,Practice 3,2024-03-08 16:30:00+03:00,2024-03-08 13:30:00,Qualifying,2024-03-08 20:00:00+03:00,2024-03-08 17:00:00,Race,2024-03-09 20:00:00+03:00,2024-03-09 17:00:00,True
3,3,Australia,Melbourne,FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2024,2024-03-24,Australian Grand Prix,conventional,Practice 1,2024-03-22 12:30:00+11:00,2024-03-22 01:30:00,...,Practice 3,2024-03-23 12:30:00+11:00,2024-03-23 01:30:00,Qualifying,2024-03-23 16:00:00+11:00,2024-03-23 05:00:00,Race,2024-03-24 15:00:00+11:00,2024-03-24 04:00:00,True
4,4,Japan,Suzuka,FORMULA 1 MSC CRUISES JAPANESE GRAND PRIX 2024,2024-04-07,Japanese Grand Prix,conventional,Practice 1,2024-04-05 11:30:00+09:00,2024-04-05 02:30:00,...,Practice 3,2024-04-06 11:30:00+09:00,2024-04-06 02:30:00,Qualifying,2024-04-06 15:00:00+09:00,2024-04-06 06:00:00,Race,2024-04-07 14:00:00+09:00,2024-04-07 05:00:00,True


In [4]:
# Get race event codes
season_race_codes = event_schedule.iloc[1:, 0]

In [5]:
season_race_codes

1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
Name: RoundNumber, dtype: int64

Return all pit stops through the season in one dataframe

In [6]:
# session = fastf1.get_session(year, season_race_codes, 'R')
pit_stop_df = pd.DataFrame()
year = 2024
for race_code in season_race_codes:
    print(f"Race code: {race_code}")
    session = fastf1.get_session(year, race_code, 'R')
    session.load()
    session_pit_stops_df = create_pit_stop_times_dataframe(session.laps)
    session_pit_stops_df = drop_red_flags(session_pit_stops_df)
    pit_stop_df = pd.concat([pit_stop_df, session_pit_stops_df])


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Race code: 1


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']


NameError: name 'create_pit_stop_times_dataframe' is not defined

In [49]:
len(pit_stop_df)

773

In [52]:
pit_stop_df

Unnamed: 0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,808,0 days 01:24:48.907000,ALB,23,0 days 00:01:41.171000,15.0,1.0,NaT,0 days 01:24:47.139000,0 days 00:00:31.510000,...,Williams,0 days 01:23:07.736000,2024-03-02 15:26:50.167,1,5.0,False,,False,False,0 days 00:00:24.779000
2,829,0 days 01:59:29.747000,ALB,23,0 days 00:01:40.333000,36.0,2.0,NaT,0 days 01:59:27.973000,0 days 00:00:31.337000,...,Williams,0 days 01:57:49.414000,2024-03-02 16:01:31.845,1,10.0,False,,False,False,0 days 00:00:24.682000
4,470,0 days 01:24:43.800000,ALO,14,0 days 00:01:41.101000,15.0,1.0,NaT,0 days 01:24:42.030000,0 days 00:00:31.480000,...,Aston Martin,0 days 01:23:02.699000,2024-03-02 15:26:45.130,1,2.0,False,,False,False,0 days 00:00:24.862000
6,496,0 days 02:07:18.049000,ALO,14,0 days 00:01:40.261000,41.0,2.0,NaT,0 days 02:07:16.321000,0 days 00:00:31.046000,...,Aston Martin,0 days 02:05:37.788000,2024-03-02 16:09:20.219,1,9.0,False,,False,False,0 days 00:00:25.061000
8,1029,0 days 01:20:02.264000,BOT,77,0 days 00:01:43.145000,12.0,1.0,NaT,0 days 01:20:00.495000,0 days 00:00:31.822000,...,Kick Sauber,0 days 01:18:19.119000,2024-03-02 15:22:01.550,1,14.0,False,,False,False,0 days 00:00:26.566000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46,264,0 days 01:46:02.637000,STR,18,0 days 00:01:32.228000,32.0,2.0,NaT,0 days 01:46:01.618000,0 days 00:00:18.014000,...,Aston Martin,0 days 01:44:30.409000,2024-12-08 13:51:17.626,1,12.0,False,,False,False,0 days 00:00:22.010000
48,368,0 days 01:30:54.499000,TSU,22,0 days 00:01:32.597000,22.0,1.0,NaT,0 days 01:30:53.420000,0 days 00:00:18.237000,...,RB,0 days 01:29:21.902000,2024-12-08 13:36:09.119,1,10.0,False,,False,False,0 days 00:00:21.809000
50,28,0 days 01:41:00.942000,VER,1,0 days 00:01:31.857000,29.0,1.0,NaT,0 days 01:40:59.944000,0 days 00:00:17.920000,...,Red Bull Racing,0 days 01:39:29.085000,2024-12-08 13:46:16.302,1,3.0,False,,False,False,0 days 00:00:32.739000
52,472,0 days 01:15:40.354000,ZHO,24,0 days 00:01:32.863000,12.0,1.0,NaT,0 days 01:15:39.370000,0 days 00:00:18.315000,...,Kick Sauber,0 days 01:14:07.491000,2024-12-08 13:20:54.708,1,12.0,False,,False,False,0 days 00:00:22.978000


In [20]:
#|export
# Function to create pit stop times for race session in a year

def create_pit_stop_times_dataframe_for_a_season(year: int, season_race_codes: pd.Series) -> pd.DataFrame:
    """
    Create a concatenated dataframe which contains the pit stop times from all races in a year.
    INPUT:
        - year: [int]
        - season_race_code: [int] code to be used by get_session() to get the race laps.
    """
    pit_stop_df = pd.DataFrame()
    for race_code in season_race_codes:
        session = fastf1.get_session(year, race_code, 'R')
#         session.load(laps=True, telemetry=True, weather=True, messages=False, livedata=False)
        session.load()
        race_name = session.event.EventName
        race_country = session.event.Location
        logger.info(f"Race code: {race_code}")
        logger.info(f"Race Name: {race_name}")
        logger.info(f"Location: {race_country}")
        logger.info("---------------------------")
        session_pit_stops_df = create_pit_stop_times_dataframe_for_a_race(session.laps)
        session_pit_stops_df = drop_red_flags(session_pit_stops_df)
        pit_stop_df = pd.concat([pit_stop_df, session_pit_stops_df])
    return pit_stop_df

In [59]:
ptst_df = create_pit_stop_times_dataframe_for_a_season(2024, season_race_codes)

Race code: 1


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix

Race Name: Bahrain Grand Prix
Location: Sakhir
Race code: 2


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Saudi Arabian Grand Prix
Location: Jeddah
Race code: 3


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Australian Grand Prix
Location: Melbourne
Race code: 4


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Japanese Grand Prix
Location: Suzuka
Race code: 5


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Chinese Grand Prix
Location: Shanghai
Race code: 6


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Miami Grand Prix
Location: Miami
Race code: 7


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Emilia Romagna Grand Prix
Location: Imola
Race code: 8


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Monaco Grand Prix
Location: Monaco
Race code: 9


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Canadian Grand Prix
Location: Montréal
Race code: 10


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Spanish Grand Prix
Location: Barcelona
Race code: 11


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for British Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Austrian Grand Prix
Location: Spielberg
Race code: 12


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: British Grand Prix
Location: Silverstone
Race code: 13


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Hungarian Grand Prix
Location: Budapest
Race code: 14


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Belgian Grand Prix
Location: Spa-Francorchamps
Race code: 15


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Dutch Grand Prix
Location: Zandvoort
Race code: 16


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Italian Grand Prix
Location: Monza
Race code: 17


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Azerbaijan Grand Prix
Location: Baku
Race code: 18


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for United States Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Singapore Grand Prix
Location: Marina Bay
Race code: 19


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: United States Grand Prix
Location: Austin
Race code: 20


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Mexico City Grand Prix
Location: Mexico City
Race code: 21


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: São Paulo Grand Prix
Location: São Paulo
Race code: 22


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Las Vegas Grand Prix
Location: Las Vegas
Race code: 23


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race Name: Qatar Grand Prix
Location: Lusail
Race code: 24


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']


Race Name: Abu Dhabi Grand Prix
Location: Yas Island


In [62]:
# Make sure this matches above dataframe
ptst_df.head()
len(ptst_df)

773

We need to fetch the race codes for a seasonm

In [4]:
#|export
# Function to extract race codes for a range of years

def fetch_season_race_codes(year: int) ->pd.Series:
    """
    Returns the race codes for a season given the year.
    """
    event_schedule = fastf1.get_event_schedule(year)
    season_race_codes = event_schedule.iloc[1:, 0]
    return season_race_codes

Let's build the big wrapper to generate the jumbo dataframe which will contains only pit stops times for all races in a year range.

In [5]:
#|export

def build_pit_stop_data_for_year_range(start_year: int, end_year: int) -> pd.DataFrame:
    """
    Builds a complete feature matrix of F1 pit stop times by iterating over 
    a specified range of years.

    INPUT:
        start_year (int): The first year (inclusive) to process.
        end_year (int): The last year (inclusive) to process.

    OUTPUT:
        pd.DataFrame: A single DataFrame containing all collected and processed 
                      pit stop records.
    """
    
    all_years_data = []
    
    # Loop from start_year up to and including end_year
    for year in range(start_year, end_year + 1):
        try:
            # Get race codes for the season
            logger.info(f"Fetching season race codes for {year}")
            season_race_codes = fetch_season_race_codes(year)
            logger.info("====> Done <====")
            
            # Get all pit stops for that year
            logger.info(f"Fetching all pit stop times for {year}")
            season_df = create_pit_stop_times_dataframe_for_a_season(year, season_race_codes)
            logger.info("====> Done <====")

            all_years_data.append(season_df)
            print(f"Successfully processed {year}. Total pit stops collected: {len(season_df)}")
        except Exception as e:
            print(f"ERROR processing year {year}: {e}")
            # Continue to the next year even if one fails
            continue

    # Concatenate all yearly DataFrames into one jumbo dataset
    if all_years_data:
        jumbo_df = pd.concat(all_years_data, ignore_index=True)
        print("\n--- Data Collection Complete ---")
        print(f"Total rows collected ({start_year}-{end_year}): {len(jumbo_df)}")
        return jumbo_df
    else:
        print("\nNo data collected.")
        return pd.DataFrame()

In [10]:
jumbo_df = build_pit_stop_data_for_year_range(2024, 2024)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix

Race code: 1
Race Name: Bahrain Grand Prix
Location: Sakhir
---------------------------


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_d

Race code: 2
Race Name: Saudi Arabian Grand Prix
Location: Jeddah
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Race code: 3
Race Name: Australian Grand Prix
Location: Melbourne
---------------------------


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data

Race code: 4
Race Name: Japanese Grand Prix
Location: Suzuka
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 5
Race Name: Chinese Grand Prix
Location: Shanghai
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 6
Race Name: Miami Grand Prix
Location: Miami
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 7
Race Name: Emilia Romagna Grand Prix
Location: Imola
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 8
Race Name: Monaco Grand Prix
Location: Monaco
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 9
Race Name: Canadian Grand Prix
Location: Montréal
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 10
Race Name: Spanish Grand Prix
Location: Barcelona
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for British Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 11
Race Name: Austrian Grand Prix
Location: Spielberg
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 12
Race Name: British Grand Prix
Location: Silverstone
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 13
Race Name: Hungarian Grand Prix
Location: Budapest
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 14
Race Name: Belgian Grand Prix
Location: Spa-Francorchamps
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 15
Race Name: Dutch Grand Prix
Location: Zandvoort
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 16
Race Name: Italian Grand Prix
Location: Monza
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 17
Race Name: Azerbaijan Grand Prix
Location: Baku
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for United States Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 18
Race Name: Singapore Grand Prix
Location: Marina Bay
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 19
Race Name: United States Grand Prix
Location: Austin
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 20
Race Name: Mexico City Grand Prix
Location: Mexico City
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 21
Race Name: São Paulo Grand Prix
Location: São Paulo
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 22
Race Name: Las Vegas Grand Prix
Location: Las Vegas
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Race code: 23
Race Name: Qatar Grand Prix
Location: Lusail
---------------------------


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']


Race code: 24
Race Name: Abu Dhabi Grand Prix
Location: Yas Island
---------------------------
Successfully processed 2024. Total pit stops collected: 773

--- Data Collection Complete ---
Total rows collected (2024-2024): 773


In [14]:
jumbo_df.shape

(773, 33)

In [17]:
fastf1.Cache.get_cache_info()
fastf1.Cache.set_enabled()


In [21]:
#| hide
import nbdev; nbdev.nbdev_export()

In [33]:
start_year = 2024
end_year = 2024
all_years_data = []
season_df = pd.DataFrame()
for year in range(start_year, end_year + 1):
    try:
        # Get race codes for the season
        print(f"Fetching season race codes for {year}")
        season_race_codes = fetch_season_race_codes(year)
        logger.info("====> Done <====\n")

        # Get all pit stops for that year
        print(f"Fetching all pit stop times for {year}")
        season_df = create_pit_stop_times_dataframe_for_a_season(year, season_race_codes)
        print("====> Done <====\n")

        all_years_data.append(season_df)
        print(f"Successfully processed {year}. Total pit stops collected: {len(season_df)}")
    except Exception as e:
        print(f"ERROR processing year {year}: {e}")
        # Continue to the next year even if one fails
        continue


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Fetching season race codes for 2024
Fetching all pit stop times for 2024


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req 

req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_dat

core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for United States Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req     

====> Done <====

Successfully processed 2024. Total pit stops collected: 773


In [34]:
jumbo_df = pd.concat(all_years_data, ignore_index=True)

In [36]:
jumbo_df.shape

(773, 33)

In [14]:
jumbo_df

Unnamed: 0,index,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,PitStopTime
0,1004,0 days 00:57:34.254000,ALO,14,0 days 00:01:40.308000,11.0,1.0,NaT,0 days 00:57:32.560000,0 days 00:00:30.890000,...,Alpine,0 days 00:55:53.946000,2021-03-28 15:25:53.956,1,9.0,False,,False,False,0 days 00:00:24.377000
1,1022,0 days 01:27:15.888000,ALO,14,0 days 00:01:40.108000,29.0,2.0,NaT,0 days 01:27:14.206000,0 days 00:00:31.265000,...,Alpine,0 days 01:25:35.780000,2021-03-28 15:55:35.790,1,12.0,False,,False,False,0 days 00:00:24.771000
2,127,0 days 01:05:26.391000,BOT,77,0 days 00:01:39.534000,16.0,1.0,NaT,0 days 01:05:24.696000,0 days 00:00:30.967000,...,Mercedes,0 days 01:03:46.857000,2021-03-28 15:33:46.867,1,2.0,False,,False,False,0 days 00:00:24.244000
3,141,0 days 01:28:01.018000,BOT,77,0 days 00:01:38.028000,30.0,2.0,NaT,0 days 01:27:59.311000,0 days 00:00:30.631000,...,Mercedes,0 days 01:26:22.990000,2021-03-28 15:56:23.000,1,2.0,False,,False,False,0 days 00:00:32.937000
4,165,0 days 02:06:23.439000,BOT,77,0 days 00:01:37.802000,54.0,3.0,NaT,0 days 02:06:21.738000,0 days 00:00:30.429000,...,Mercedes,0 days 02:04:45.637000,2021-03-28 16:34:45.647,1,3.0,False,,False,False,0 days 00:00:24.586000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,338,0 days 02:23:03.403000,TSU,22,0 days 00:01:55.178000,53.0,2.0,NaT,0 days 02:23:02.376000,0 days 00:00:22.955000,...,AlphaTauri,0 days 02:21:08.225000,2021-12-12 14:22:08.235,4,6.0,False,,False,False,0 days 00:00:21.866000
678,471,0 days 01:21:44.189000,VER,33,0 days 00:01:31.515000,13.0,1.0,NaT,0 days 01:21:43.157000,0 days 00:00:18.213000,...,Red Bull Racing,0 days 01:20:12.674000,2021-12-12 13:21:12.684,1,2.0,False,,False,False,0 days 00:00:21.179000
679,494,0 days 01:56:07.952000,VER,33,0 days 00:01:43.598000,36.0,2.0,NaT,0 days 01:56:06.936000,0 days 00:00:17.689000,...,Red Bull Racing,0 days 01:54:24.354000,2021-12-12 13:55:24.364,16,2.0,False,,False,False,0 days 00:00:21.865000
680,511,0 days 02:21:38.652000,VER,33,0 days 00:01:42.072000,53.0,3.0,NaT,0 days 02:21:37.624000,0 days 00:00:17.530000,...,Red Bull Racing,0 days 02:19:56.580000,2021-12-12 14:20:56.590,124,2.0,False,,False,False,0 days 00:00:21.458000
