In [1]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
# Enable FastF1 caching
fastf1.Cache.enable_cache("f1_cache")

In [3]:
# Load FastF1 2024 Australian GP race session
session_2024 = fastf1.get_session(2024, 3, "R")
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']


In [4]:
# Just checking the data
print(session_2024)
# print(session_2024.laps.head())
# print(session_2024.drivers)
# print(session_2024.results)
# print(dir(session_2024))

2024 Season Round 3: Australian Grand Prix - Race


In [5]:
# Extract lap times
laps_2024 = session_2024.laps[["Driver", "LapTime"]].copy()
laps_2024.dropna(subset=["LapTime"], inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()

In [6]:
# 2025 Qualifying Data
qualifying_2025 = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "George Russell", "Yuki Tsunoda",
               "Alexander Albon", "Charles Leclerc", "Lewis Hamilton", "Pierre Gasly", "Carlos Sainz", "Fernando Alonso", "Lance Stroll"],
    "QualifyingTime (s)": [75.096, 75.180, 75.481, 75.546, 75.670,
                           75.737, 75.755, 75.973, 75.980, 76.062, 76.4, 76.5]
})

In [7]:
# Map full names to FastF1 3-letter codes
driver_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
    "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Lance Stroll": "STR", "Fernando Alonso": "ALO"
}

qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)


In [8]:
print(qualifying_2025)
print(laps_2024)

             Driver  QualifyingTime (s) DriverCode
0      Lando Norris              75.096        NOR
1     Oscar Piastri              75.180        PIA
2    Max Verstappen              75.481        VER
3    George Russell              75.546        RUS
4      Yuki Tsunoda              75.670        TSU
5   Alexander Albon              75.737        ALB
6   Charles Leclerc              75.755        LEC
7    Lewis Hamilton              75.973        HAM
8      Pierre Gasly              75.980        GAS
9      Carlos Sainz              76.062        SAI
10  Fernando Alonso              76.400        ALO
11     Lance Stroll              76.500        STR
    Driver                LapTime  LapTime (s)
0      VER 0 days 00:01:27.458000       87.458
1      VER 0 days 00:01:24.099000       84.099
2      VER 0 days 00:01:23.115000       83.115
4      GAS 0 days 00:01:37.304000       97.304
5      GAS 0 days 00:01:24.649000       84.649
..     ...                    ...          ...
993    P

In [9]:
# Merge 2025 Qualifying Data with 2024 Race Data
merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver")

In [10]:
# Use only "QualifyingTime (s)" as a feature
X = merged_data[["QualifyingTime (s)"]]
y = merged_data["LapTime (s)"]

if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing. Check data sources!")

In [11]:
# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39)
model.fit(X_train, y_train)

In [12]:
# Predict using 2025 qualifying times
predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)"]])
qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

In [13]:
qualifying_2025

Unnamed: 0,Driver,QualifyingTime (s),DriverCode,PredictedRaceTime (s)
0,Lando Norris,75.096,NOR,82.712267
1,Oscar Piastri,75.18,PIA,84.330049
2,Max Verstappen,75.481,VER,85.187728
3,George Russell,75.546,RUS,83.88691
4,Yuki Tsunoda,75.67,TSU,84.417801
5,Alexander Albon,75.737,ALB,84.644262
6,Charles Leclerc,75.755,LEC,83.079012
7,Lewis Hamilton,75.973,HAM,86.098211
8,Pierre Gasly,75.98,GAS,85.5414
9,Carlos Sainz,76.062,SAI,83.621623


In [14]:
# Rank drivers by predicted race time
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

In [15]:
qualifying_2025

Unnamed: 0,Driver,QualifyingTime (s),DriverCode,PredictedRaceTime (s)
0,Lando Norris,75.096,NOR,82.712267
6,Charles Leclerc,75.755,LEC,83.079012
9,Carlos Sainz,76.062,SAI,83.621623
10,Fernando Alonso,76.4,ALO,83.871856
3,George Russell,75.546,RUS,83.88691
1,Oscar Piastri,75.18,PIA,84.330049
4,Yuki Tsunoda,75.67,TSU,84.417801
5,Alexander Albon,75.737,ALB,84.644262
2,Max Verstappen,75.481,VER,85.187728
11,Lance Stroll,76.5,STR,85.287535


In [18]:
# Print final predictions
print("\n🏁 Predicted 2025 Australian GP Winner 🏁\n")
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])


🏁 Predicted 2025 Australian GP Winner 🏁

             Driver  PredictedRaceTime (s)
0      Lando Norris              82.712267
6   Charles Leclerc              83.079012
9      Carlos Sainz              83.621623
10  Fernando Alonso              83.871856
3    George Russell              83.886910
1     Oscar Piastri              84.330049
4      Yuki Tsunoda              84.417801
5   Alexander Albon              84.644262
2    Max Verstappen              85.187728
11     Lance Stroll              85.287535
8      Pierre Gasly              85.541400
7    Lewis Hamilton              86.098211


In [19]:
# Evaluate Model
y_pred = model.predict(X_test)
print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")


🔍 Model Error (MAE): 3.47 seconds
