In [2]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [3]:
fastf1.Cache.enable_cache('f1_cache')

In [4]:
session_2024 = fastf1.get_session(2024,3, "R")
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No

In [5]:
session_2024.laps.columns

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate'],
      dtype='object')

In [6]:
pd.set_option('display.max_columns', None)
session_2024.laps.head(10)

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
0,0 days 00:59:56.944000,VER,1,0 days 00:01:27.458000,1.0,1.0,NaT,NaT,NaT,0 days 00:00:18.462000,0 days 00:00:35.518000,NaT,0 days 00:59:21.516000,0 days 00:59:57.152000,278.0,285.0,283.0,238.0,False,MEDIUM,1.0,True,Red Bull Racing,0 days 00:58:29.232000,2024-03-24 04:03:12.430,1,1.0,False,,False,False
1,0 days 01:01:21.043000,VER,1,0 days 00:01:24.099000,2.0,1.0,NaT,NaT,0 days 00:00:29.623000,0 days 00:00:18.375000,0 days 00:00:36.101000,0 days 01:00:26.657000,0 days 01:00:45.032000,0 days 01:01:21.133000,273.0,297.0,298.0,281.0,True,MEDIUM,2.0,True,Red Bull Racing,0 days 00:59:56.944000,2024-03-24 04:04:40.142,1,2.0,False,,False,True
2,0 days 01:02:44.158000,VER,1,0 days 00:01:23.115000,3.0,1.0,NaT,NaT,0 days 00:00:28.793000,0 days 00:00:18.573000,0 days 00:00:35.749000,0 days 01:01:49.926000,0 days 01:02:08.499000,0 days 01:02:44.248000,275.0,314.0,295.0,305.0,True,MEDIUM,3.0,True,Red Bull Racing,0 days 01:01:21.043000,2024-03-24 04:06:04.241,1,2.0,False,,False,True
3,0 days 01:04:24.523000,VER,1,NaT,4.0,1.0,NaT,0 days 01:04:24.613000,0 days 00:00:28.756000,0 days 00:00:18.353000,NaT,0 days 01:03:13.063000,0 days 01:03:31.408000,NaT,270.0,308.0,,302.0,False,MEDIUM,4.0,True,Red Bull Racing,0 days 01:02:44.158000,2024-03-24 04:07:27.356,1,19.0,False,,False,False
4,0 days 01:00:06.790000,GAS,10,0 days 00:01:37.304000,1.0,1.0,NaT,NaT,NaT,0 days 00:00:18.854000,0 days 00:00:38.564000,NaT,0 days 00:59:28.406000,0 days 01:00:06.942000,267.0,316.0,287.0,223.0,False,MEDIUM,1.0,True,Alpine,0 days 00:58:29.232000,2024-03-24 04:03:12.430,1,17.0,False,,False,False
5,0 days 01:01:31.439000,GAS,10,0 days 00:01:24.649000,2.0,1.0,NaT,NaT,0 days 00:00:29.905000,0 days 00:00:18.241000,0 days 00:00:36.503000,0 days 01:00:36.747000,0 days 01:00:54.988000,0 days 01:01:31.491000,270.0,325.0,304.0,287.0,True,MEDIUM,2.0,True,Alpine,0 days 01:00:06.790000,2024-03-24 04:04:49.988,1,17.0,False,,False,True
6,0 days 01:02:55.290000,GAS,10,0 days 00:01:23.851000,3.0,1.0,NaT,NaT,0 days 00:00:29.379000,0 days 00:00:18.148000,0 days 00:00:36.324000,0 days 01:02:00.870000,0 days 01:02:19.018000,0 days 01:02:55.342000,271.0,322.0,304.0,292.0,True,MEDIUM,3.0,True,Alpine,0 days 01:01:31.439000,2024-03-24 04:06:14.637,1,17.0,False,,False,True
7,0 days 01:04:18.990000,GAS,10,0 days 00:01:23.700000,4.0,1.0,NaT,NaT,0 days 00:00:29.050000,0 days 00:00:18.116000,0 days 00:00:36.534000,0 days 01:03:24.392000,0 days 01:03:42.508000,0 days 01:04:19.042000,272.0,320.0,304.0,305.0,True,MEDIUM,4.0,True,Alpine,0 days 01:02:55.290000,2024-03-24 04:07:38.488,1,16.0,False,,False,True
8,0 days 01:05:43.516000,GAS,10,0 days 00:01:24.526000,5.0,1.0,NaT,NaT,0 days 00:00:29.693000,0 days 00:00:17.983000,0 days 00:00:36.850000,0 days 01:04:48.735000,0 days 01:05:06.718000,0 days 01:05:43.568000,273.0,334.0,302.0,,False,MEDIUM,5.0,True,Alpine,0 days 01:04:18.990000,2024-03-24 04:09:02.188,1,16.0,False,,False,True
9,0 days 01:07:07.428000,GAS,10,0 days 00:01:23.912000,6.0,1.0,NaT,NaT,0 days 00:00:29.368000,0 days 00:00:18.107000,0 days 00:00:36.437000,0 days 01:06:12.936000,0 days 01:06:31.043000,0 days 01:07:07.480000,272.0,320.0,301.0,308.0,False,MEDIUM,6.0,True,Alpine,0 days 01:05:43.516000,2024-03-24 04:10:26.714,1,15.0,False,,False,True


In [7]:
lap_2024 = session_2024.laps[["Driver", "LapTime"]].copy()
lap_2024.dropna(subset=["LapTime"],inplace=True)
lap_2024["LapTime (s)"] = lap_2024["LapTime"].dt.total_seconds()

In [8]:
lap_2024

Unnamed: 0,Driver,LapTime,LapTime (s)
0,VER,0 days 00:01:27.458000,87.458
1,VER,0 days 00:01:24.099000,84.099
2,VER,0 days 00:01:23.115000,83.115
4,GAS,0 days 00:01:37.304000,97.304
5,GAS,0 days 00:01:24.649000,84.649
...,...,...,...
993,PIA,0 days 00:01:20.199000,80.199
994,PIA,0 days 00:01:20.754000,80.754
995,PIA,0 days 00:01:20.357000,80.357
996,PIA,0 days 00:01:25.255000,85.255


In [9]:
qualifying_2025 = pd.DataFrame(
    {"Driver":["Lando Norris","Oscar Piastri","Max Verstappen","George Russell", "Yuki Tsunoda","Alexander Albon","Charles Leclerc","Lewis Hamilton","Pierre Gasly","Carlos Sainz","Lance Stroll", "Fernando Alonso"],
    "QualifyingTime (s)" : [75.096, 75.080, 75.481, 75.546, 75.670, 75.737, 75.755, 75.973, 75.980, 76.062, 76.4, 76.5]}
)
qualifying_2025

Unnamed: 0,Driver,QualifyingTime (s)
0,Lando Norris,75.096
1,Oscar Piastri,75.08
2,Max Verstappen,75.481
3,George Russell,75.546
4,Yuki Tsunoda,75.67
5,Alexander Albon,75.737
6,Charles Leclerc,75.755
7,Lewis Hamilton,75.973
8,Pierre Gasly,75.98
9,Carlos Sainz,76.062


In [10]:
driver_mapping = {
    "Lando Norris" :"NOR","Oscar Piastri" : "PIA" ,"Max Verstappen" : "VER","George Russell" : "RUS", "Yuki Tsunoda" : "TSU","Alexander Albon" : "ALB","Charles Leclerc" : "LEC","Lewis Hamilton" : "HAM" ,"Pierre Gasly" : "GAS","Carlos Sainz" : "SAI","Lance Stroll" : "STR", "Fernando Alonso" : "ALO"
}

In [11]:
qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

In [12]:
print(qualifying_2025)
print(lap_2024)

             Driver  QualifyingTime (s) DriverCode
0      Lando Norris              75.096        NOR
1     Oscar Piastri              75.080        PIA
2    Max Verstappen              75.481        VER
3    George Russell              75.546        RUS
4      Yuki Tsunoda              75.670        TSU
5   Alexander Albon              75.737        ALB
6   Charles Leclerc              75.755        LEC
7    Lewis Hamilton              75.973        HAM
8      Pierre Gasly              75.980        GAS
9      Carlos Sainz              76.062        SAI
10     Lance Stroll              76.400        STR
11  Fernando Alonso              76.500        ALO
    Driver                LapTime  LapTime (s)
0      VER 0 days 00:01:27.458000       87.458
1      VER 0 days 00:01:24.099000       84.099
2      VER 0 days 00:01:23.115000       83.115
4      GAS 0 days 00:01:37.304000       97.304
5      GAS 0 days 00:01:24.649000       84.649
..     ...                    ...          ...
993    P

In [13]:
# merging 2025 Qualifying Data with Race Data
merged_data = qualifying_2025.merge(lap_2024, left_on="DriverCode", right_on="Driver")

In [14]:
merged_data

Unnamed: 0,Driver_x,QualifyingTime (s),DriverCode,Driver_y,LapTime,LapTime (s)
0,Lando Norris,75.096,NOR,NOR,0 days 00:01:29.784000,89.784
1,Lando Norris,75.096,NOR,NOR,0 days 00:01:23.183000,83.183
2,Lando Norris,75.096,NOR,NOR,0 days 00:01:22.656000,82.656
3,Lando Norris,75.096,NOR,NOR,0 days 00:01:22.609000,82.609
4,Lando Norris,75.096,NOR,NOR,0 days 00:01:22.685000,82.685
...,...,...,...,...,...,...
589,Fernando Alonso,76.500,ALO,ALO,0 days 00:01:21.035000,81.035
590,Fernando Alonso,76.500,ALO,ALO,0 days 00:01:20.952000,80.952
591,Fernando Alonso,76.500,ALO,ALO,0 days 00:01:20.752000,80.752
592,Fernando Alonso,76.500,ALO,ALO,0 days 00:01:35.183000,95.183


In [15]:
qualifying_2025.describe()

Unnamed: 0,QualifyingTime (s)
count,12.0
mean,75.773333
std,0.444729
min,75.08
25%,75.52975
50%,75.746
75%,76.0005
max,76.5


In [16]:
X = merged_data[["QualifyingTime (s)"]]
y = merged_data["LapTime (s)"]

In [17]:
if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing.")

In [18]:
# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=39)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39)
model.fit(X_train,y_train)

In [19]:
predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)"]])
predicted_lap_times

array([82.71563371, 84.33022293, 85.15165839, 83.88827513, 84.41731517,
       84.64223084, 83.08071116, 86.08678911, 85.54205824, 83.6237112 ,
       84.68956389, 85.11263008])

In [20]:
qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

In [21]:
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

In [22]:
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])

             Driver  PredictedRaceTime (s)
0      Lando Norris              82.715634
6   Charles Leclerc              83.080711
9      Carlos Sainz              83.623711
3    George Russell              83.888275
1     Oscar Piastri              84.330223
4      Yuki Tsunoda              84.417315
5   Alexander Albon              84.642231
10     Lance Stroll              84.689564
11  Fernando Alonso              85.112630
2    Max Verstappen              85.151658
8      Pierre Gasly              85.542058
7    Lewis Hamilton              86.086789


In [23]:
y_pred = model.predict(X_test)
print(f"\r\n Model Error (MAE) : {mean_absolute_error(y_test,y_pred):.2f} seconds")


 Model Error (MAE) : 3.37 seconds
