In [1]:
pip install fastf1

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import fastf1
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [3]:
fastf1.Cache.enable_cache('fastf1_cache')

In [4]:
session_2024 = fastf1.get_session(2024, 3, 'R')
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']


### Extracting the required features

In [5]:
laps_24 = pd.DataFrame(session_2024.laps[['Driver','LapTime']].copy())
laps_24.head()

Unnamed: 0,Driver,LapTime
0,VER,0 days 00:01:27.458000
1,VER,0 days 00:01:24.099000
2,VER,0 days 00:01:23.115000
3,VER,NaT
4,GAS,0 days 00:01:37.304000


### Removing all rows with NaN LapTimes

In [6]:
laps_24.dropna(subset=['LapTime'], inplace=True)
laps_24.head()

Unnamed: 0,Driver,LapTime
0,VER,0 days 00:01:27.458000
1,VER,0 days 00:01:24.099000
2,VER,0 days 00:01:23.115000
4,GAS,0 days 00:01:37.304000
5,GAS,0 days 00:01:24.649000


### Converting LapTime datatype into integers (seconds)  

In [7]:
laps_24['LapTime(s)'] = laps_24['LapTime'].dt.total_seconds()
laps_24.head()

Unnamed: 0,Driver,LapTime,LapTime(s)
0,VER,0 days 00:01:27.458000,87.458
1,VER,0 days 00:01:24.099000,84.099
2,VER,0 days 00:01:23.115000,83.115
4,GAS,0 days 00:01:37.304000,97.304
5,GAS,0 days 00:01:24.649000,84.649


### Creating a new DataFrame that stores the simulated qualifying times of each driver

In [8]:
qualifying_25 = pd.DataFrame({
    "Driver" : ['Lando Norris', 'Oscar Piastri', 'Max, Verstappen', 'George Russel', 'Yuki Tsunoda', 'Alexander Albon', 'Charles Leclerc','Lewis Hamilton', 'Pierre Gasly', 'Carlos Sainz', 'Fernando Alonso', 'Lance Stroll'],
    "QualifyingTime (s)": [75.096, 75.180, 75.481, 75.546, 75.670, 75.737, 75.755, 75.973, 75.980, 76.062, 76.4, 76.5]
})

driver_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
    "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Lance Stroll": "STR", "Fernando Alonso": "ALO"
}

### Mapping each driver with their code

In [9]:
qualifying_25['DriverCode'] = qualifying_25['Driver'].map(driver_mapping)

### Merging the two datasets

In [10]:
merged_data = qualifying_25.merge(laps_24, left_on ='DriverCode', right_on='Driver')

### Determining the independent and target variable

In [11]:
X = merged_data[['QualifyingTime (s)']]
y = merged_data['LapTime(s)']

### Handling empty dataset error

In [12]:
if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing. Check the data source!")

### Splitting into training and testing set

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Training the model

In [20]:
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

### Predicting new LapTime values for the qualifying dataset (future race)

In [15]:
predicted_lap_time = model.predict(qualifying_25[['QualifyingTime (s)']])
qualifying_25['Predicted_RaceTime (s)'] = predicted_lap_time

In [16]:
qualifying_25.head()

Unnamed: 0,Driver,QualifyingTime (s),DriverCode,Predicted_RaceTime (s)
0,Lando Norris,75.096,NOR,83.251367
1,Oscar Piastri,75.18,PIA,84.476367
2,"Max, Verstappen",75.481,,83.810061
3,George Russel,75.546,,83.810061
4,Yuki Tsunoda,75.67,TSU,83.810061


In [17]:
qualifying_25 = qualifying_25.sort_values(by = 'Predicted_RaceTime (s)')

In [18]:
print("\nPredicted 2025 GP Winner\n")
print(qualifying_25[["Driver", "Predicted_RaceTime (s)"]])


Predicted 2025 GP Winner

             Driver  Predicted_RaceTime (s)
6   Charles Leclerc               83.208047
0      Lando Norris               83.251367
9      Carlos Sainz               83.468523
2   Max, Verstappen               83.810061
3     George Russel               83.810061
4      Yuki Tsunoda               83.810061
10  Fernando Alonso               83.902102
11     Lance Stroll               84.015183
1     Oscar Piastri               84.476367
8      Pierre Gasly               84.989801
5   Alexander Albon               85.075962
7    Lewis Hamilton               85.384640


### Evaluating the model

In [21]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Model Error (MAE) : {mae:.2f} seconds ")

Model Error (MAE) : 3.87 seconds 
