In [1]:
import fastf1
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
belgian_gp_rounds = {
    2021: 12,
    2022: 14,
    2023: 13,
    2024: 13
}
all_data = []

In [4]:
for year, rd in belgian_gp_rounds.items():
    try:
        print(f"\nLoading Belgian GP{year} (Round {rd})...")
        session = fastf1.get_session(year, rd, "R")
        session.load()

        #Get lap data
        laps = session.laps[["Driver", "Team", "LapTime"]].copy()
        laps.dropna(subset=["LapTime"], inplace=True)
        laps["LapTime (s)"] = laps["LapTime"].dt.total_seconds()

        #Average lap time per driver
        avg_lap_times = laps.groupby(["Driver", "Team"])["LapTime (s)"].mean().reset_index()

        #Finishing position from official results
        results = session.results[["Abbreviation", "Position"]]
        avg_lap_times = avg_lap_times.merge(results, left_on="Driver", right_on="Abbreviation")
        avg_lap_times["Year"] = year

        final_data = avg_lap_times[["Year", "Driver", "Team", "LapTime (s)", "Position"]]
        all_data.append(final_data)

    except Exception as e:
        print(f"Failed to fetch details of {year}:{e}")





Loading Belgian GP2021 (Round 12)...


core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['33', '63', '44', '3', '5', '10', '31', '16', '6', '55', '14', '77', '99', '4', '22', '47', '9', '7', '11', '18']



Loading Belgian GP2022 (Round 14)...


core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '63', '14', '16', '31', '5', '10', '23', '18', '4', '22', '24', '3', '20', '47', '6', '77', '44']



Loading Belgian GP2023 (Round 13)...


core           INFO 	Loading data for Dutch Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '10', '11', '55', '44', '4', '23', '81', '31', '18', '27', '40', '77', '22', '20', '63', '24', '16', '2']



Loading Belgian GP2024 (Round 13)...


core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']


In [5]:
combined_df = pd.concat(all_data, ignore_index=True) #combining into 1 dataset

In [6]:
#Preparing data for model training
combined_df["DriverCode"] = combined_df["Driver"].astype("category").cat.codes
combined_df["TeamCode"] = combined_df["Team"].astype("category").cat.codes

In [7]:
# Features and target for prediction
X = combined_df[["LapTime (s)", "DriverCode", "TeamCode"]]
y = combined_df["Position"]

In [8]:
#Training the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

In [9]:
latest_gp = combined_df[combined_df["Year"] == 2024].copy()

In [10]:
#Encoding drivers and teams the same way
latest_gp["DriverCode"] = latest_gp["Driver"].astype("category").cat.codes
latest_gp["TeamCode"] = latest_gp["Team"].astype("category").cat.codes

X_future = latest_gp[["LapTime (s)", "DriverCode", "TeamCode"]]

In [11]:
#Predicting the positions
latest_gp["PredictedPosition"] = model.predict(X_future)
latest_gp = latest_gp.sort_values(by="PredictedPosition")

In [12]:
#Finally displaying the predictions
print("\n🏁 Predicted 2025 Belgian GP Finishing Order 🏁\n")
print(latest_gp[["Driver", "Team", "PredictedPosition"]])


🏁 Predicted 2025 Belgian GP Finishing Order 🏁

   Driver             Team  PredictedPosition
63    HAM         Mercedes           3.397808
65    LEC          Ferrari           3.814727
77    VER  Red Bull Racing           4.860672
73    SAI          Ferrari           4.996988
70    PIA          McLaren           6.357639
69    PER  Red Bull Racing           6.740006
72    RUS         Mercedes           6.797592
67    NOR          McLaren           7.637770
76    TSU               RB           9.060127
75    STR     Aston Martin          11.868110
60    ALO     Aston Martin          12.458636
71    RIC               RB          12.905581
64    HUL     Haas F1 Team          13.086761
59    ALB         Williams          13.627021
74    SAR         Williams          16.334129
61    BOT      Kick Sauber          16.419346
66    MAG     Haas F1 Team          16.877101
78    ZHO      Kick Sauber          18.253159
68    OCO           Alpine          18.551858
62    GAS           Alpine      

In [13]:
#Evaluatijng model performance
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"\n📊 Model MAE on validation data: {mae:.2f} position points")


📊 Model MAE on validation data: 2.02 position points
