<a href="https://colab.research.google.com/github/ekosgei/f1-pitstop-strategy-data-wrangling/blob/main/f1_pitstop_strategy_data_wrangling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [161]:
!pip install fastf1 pandas numpy matplotlib



In [173]:
import fastf1
import pandas as pd
race = fastf1.get_session(2025, 'Belgian Grand Prix', 'R')
race.load()

core           INFO 	Loading data for Belgian Grand Prix - Race [v3.7.0]
INFO:fastf1.fastf1.core:Loading data for Belgian Grand Prix - Race [v3.7.0]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_d

Goal: Create a data set that will make it easier to analyse the effectiveness of the undercut strategy used by formula one race teams.


In [174]:
# Filter the race  laps data to only contain data of laps when driver go into the pits for a tyre change.

laps = race.laps
pit_stops = laps.pick_box_laps(which="in")

#drop formation laps as these are laps when racing has not started.
pit_stops = pit_stops.dropna(subset=['LapTime'])

#select for necessary columns integral to analysis.
pit_stops = pit_stops[['Team','Driver','LapNumber','Position', 'Stint']]


#Merge to finding lap position of drivers before pit in laps

lap_positions = laps[['Driver', 'LapNumber', 'Position', 'Stint']]

pit_stops['Previous_Lap'] = pit_stops['LapNumber'] - 1
pit_stops = pit_stops.merge(
    lap_positions,
    left_on= ['Driver', 'Previous_Lap'],
    right_on = ['Driver', 'LapNumber'],
    how='left',
    suffixes=('_pit', '_before')
)
pit_stops = pit_stops.drop(columns=['Position_pit','LapNumber_before','Stint_pit'])

pit_stops = pit_stops.rename(columns={
    'LapNumber_pit': 'PitLap',
    'Previous_Lap': 'LapBeforePit',
    'Position_before': 'PositionBeforePit',
    'Stint_before': 'StintBeforePit'
})

pit_stops.head()

Unnamed: 0,Team,Driver,PitLap,LapBeforePit,PositionBeforePit,StintBeforePit
0,McLaren,PIA,12.0,11.0,1.0,1.0
1,McLaren,NOR,13.0,12.0,1.0,1.0
2,Ferrari,LEC,12.0,11.0,3.0,1.0
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0
4,Mercedes,RUS,12.0,11.0,5.0,1.0


In [175]:
#Introduce Pitstop Strategy
pit_stops['Pitstop_Strategy'] =(
    pit_stops
    .groupby('Driver')['PitLap']
    .transform('count')
    .astype(str)+"-stop"
)
pit_stops.head()

Unnamed: 0,Team,Driver,PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Pitstop_Strategy
0,McLaren,PIA,12.0,11.0,1.0,1.0,1-stop
1,McLaren,NOR,13.0,12.0,1.0,1.0,1-stop
2,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop
4,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop


In [176]:
# Introduce Rival Driver (- driver ahead when our targe driver pits/decide to pits)
pit_stops['Rival_Position'] = pit_stops['PositionBeforePit'] - 1
pit_stops = pit_stops.merge(
    lap_positions,
    left_on=['LapBeforePit', 'Rival_Position'],
    right_on=['LapNumber', 'Position'],
    how='left',
    suffixes=('', '_rival')
)
pit_stops = pit_stops.drop(columns=['LapNumber','Position'])
pit_stops = pit_stops.rename(columns={
    'Stint': 'Rival_Stint',
    'Driver': 'Target_Driver',
    'Driver_rival': 'Rival_Driver',
    'Stint_rival': 'Rival_Stint',
    'Team': 'Target_Team',
    'PitLap': 'Target_PitLap',

})

#pit_stops = pit_stops[pit_stops['Rival_Driver'].notna()]

pit_stops.head()

Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Pitstop_Strategy,Rival_Position,Rival_Driver,Rival_Stint
0,McLaren,PIA,12.0,11.0,1.0,1.0,1-stop,0.0,,
1,McLaren,NOR,13.0,12.0,1.0,1.0,1-stop,0.0,,
2,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0
4,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0


In [177]:
#introduce Rival PitLap
pit_stops_with_rival = pit_stops.merge (
    pit_stops,
     left_on = ['Rival_Driver', 'Rival_Stint'],
     right_on = ['Target_Driver', 'StintBeforePit'],
     how = 'left',
     suffixes=("", '_rival')
)
pit_stops_with_rival = pit_stops_with_rival.drop(columns=['Target_Driver_rival','LapBeforePit_rival', 'PositionBeforePit_rival',	'StintBeforePit_rival'	, 'Rival_Position_rival',	'Rival_Driver_rival', 'Rival_Stint_rival'])


pit_stops_with_rival = pit_stops_with_rival.rename(columns={
    'Target_Team_rival': 'Rival_Team',
    'Target_PitLap_rival': 'Rival_PitLap',
    'Pitstop_Strategy_rival': 'Rival_PitStrategy',
    'Pitstop_Strategy': 'Target_PitStrategy'
})
pit_stops_with_rival.head()

Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Target_PitStrategy,Rival_Position,Rival_Driver,Rival_Stint,Rival_Team,Rival_PitLap,Rival_PitStrategy
0,McLaren,PIA,12.0,11.0,1.0,1.0,1-stop,0.0,,,,,
1,McLaren,NOR,13.0,12.0,1.0,1.0,1-stop,0.0,,,,,
2,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0,McLaren,13.0,1-stop
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0,Ferrari,12.0,1-stop
4,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0,Red Bull Racing,12.0,1-stop


In [178]:
pit_stops_with_rival = pit_stops_with_rival[
    pit_stops_with_rival['Target_PitStrategy'] == pit_stops_with_rival['Rival_PitStrategy']
]
pit_stops_with_rival.head()

Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Target_PitStrategy,Rival_Position,Rival_Driver,Rival_Stint,Rival_Team,Rival_PitLap,Rival_PitStrategy
2,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0,McLaren,13.0,1-stop
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0,Ferrari,12.0,1-stop
4,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0,Red Bull Racing,12.0,1-stop
5,Williams,ALB,12.0,11.0,6.0,1.0,1-stop,5.0,RUS,1.0,Mercedes,12.0,1-stop
6,Ferrari,HAM,11.0,10.0,13.0,1.0,1-stop,12.0,BEA,1.0,Haas F1 Team,12.0,1-stop


I restricted the analysis to drivers with the same pit-stop count (e.g., 1-stop vs 1-stop) to isolate the effect of pit timing. This made merging easier and will enable the comparisons are made within the same strategic mindset, keeping the undercut analysis clean. Comparing drivers with different total stops could mix strategy difference such as using a different pit strategy as opponent to gain an advantage.

In [179]:
#Assumption: Outlap = PitInLap + 1
pit_stops_with_rival.loc [:,'Rival_OutLap'] = pit_stops_with_rival['Rival_PitLap'] + 1
pit_stops_with_rival.head()


Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Target_PitStrategy,Rival_Position,Rival_Driver,Rival_Stint,Rival_Team,Rival_PitLap,Rival_PitStrategy,Rival_OutLap
2,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0,McLaren,13.0,1-stop,14.0
3,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0,Ferrari,12.0,1-stop,13.0
4,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0,Red Bull Racing,12.0,1-stop,13.0
5,Williams,ALB,12.0,11.0,6.0,1.0,1-stop,5.0,RUS,1.0,Mercedes,12.0,1-stop,13.0
6,Ferrari,HAM,11.0,10.0,13.0,1.0,1-stop,12.0,BEA,1.0,Haas F1 Team,12.0,1-stop,13.0


In [180]:
pit_stops_with_rival = pit_stops_with_rival.merge(
    lap_positions,
    left_on = ['Target_Driver', 'Rival_OutLap'],
    right_on = ['Driver', 'LapNumber'],
    how = 'left'
)
pit_stops_with_rival = pit_stops_with_rival.drop(columns=['LapNumber','Driver'])

pit_stops_with_rival = pit_stops_with_rival.rename(columns={
    'Position': 'Rival_OutLap_TD_Pos',
    'Stint': 'Rival_OutLap_TD_Stint'
})

pit_stops_with_rival.head()

Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Target_PitStrategy,Rival_Position,Rival_Driver,Rival_Stint,Rival_Team,Rival_PitLap,Rival_PitStrategy,Rival_OutLap,Rival_OutLap_TD_Pos,Rival_OutLap_TD_Stint
0,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0,McLaren,13.0,1-stop,14.0,3.0,2.0
1,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0,Ferrari,12.0,1-stop,13.0,6.0,2.0
2,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0,Red Bull Racing,12.0,1-stop,13.0,7.0,2.0
3,Williams,ALB,12.0,11.0,6.0,1.0,1-stop,5.0,RUS,1.0,Mercedes,12.0,1-stop,13.0,8.0,2.0
4,Ferrari,HAM,11.0,10.0,13.0,1.0,1-stop,12.0,BEA,1.0,Haas F1 Team,12.0,1-stop,13.0,9.0,2.0


In [181]:
pit_stops_with_rival = pit_stops_with_rival.merge(
    lap_positions,
    left_on = ['Rival_Driver', 'Rival_OutLap'],
    right_on = ['Driver', 'LapNumber'],
    how = 'left'
)

pit_stops_with_rival = pit_stops_with_rival.drop(columns=['LapNumber','Driver'])

pit_stops_with_rival = pit_stops_with_rival.rename(columns={
    'Position': 'Rival_OutLap_RD_Pos',
    'Stint': 'Rival_OutLap_RD_Stint'
})

pit_stops_with_rival.head()

Unnamed: 0,Target_Team,Target_Driver,Target_PitLap,LapBeforePit,PositionBeforePit,StintBeforePit,Target_PitStrategy,Rival_Position,Rival_Driver,Rival_Stint,Rival_Team,Rival_PitLap,Rival_PitStrategy,Rival_OutLap,Rival_OutLap_TD_Pos,Rival_OutLap_TD_Stint,Rival_OutLap_RD_Pos,Rival_OutLap_RD_Stint
0,Ferrari,LEC,12.0,11.0,3.0,1.0,1-stop,2.0,NOR,1.0,McLaren,13.0,1-stop,14.0,3.0,2.0,2.0,2.0
1,Red Bull Racing,VER,12.0,11.0,4.0,1.0,1-stop,3.0,LEC,1.0,Ferrari,12.0,1-stop,13.0,6.0,2.0,5.0,2.0
2,Mercedes,RUS,12.0,11.0,5.0,1.0,1-stop,4.0,VER,1.0,Red Bull Racing,12.0,1-stop,13.0,7.0,2.0,6.0,2.0
3,Williams,ALB,12.0,11.0,6.0,1.0,1-stop,5.0,RUS,1.0,Mercedes,12.0,1-stop,13.0,8.0,2.0,7.0,2.0
4,Ferrari,HAM,11.0,10.0,13.0,1.0,1-stop,12.0,BEA,1.0,Haas F1 Team,12.0,1-stop,13.0,9.0,2.0,15.0,2.0


The final dataset will support analysis of the effectiveness of the undercut strategy across different race tracks. Any race can be selected as input in cell 2. The dataset includes several key pieces of information:
1. The opponent of each driver at every pit stop **(Rival Driver)**

2. The next pit out lap of the opponent **(Rival_OutLap)**

3. The position of the driver after the opponent exits the pit lane **(Rival_OutLap_TD_Pos)**

4. The position of the opponent after exiting the pit lane **(Rival_OutLap_RD_Pos)**

In [182]:
#pit_stops_with_rival.to_csv("undercut_analysis_Belgian2025.csv", index=False)
