In [17]:
import fastf1 
import pandas as pd 
from sklearn.metrics import mean_absolute_error 
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

In [138]:
session_2024 = fastf1.get_session(2024, 'Japanese', 'R')
session_2024.load()

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']


In [165]:
qualifying_2025 = fastf1.get_session(2025, 'Japanese', 'Q')
qualifying_2025.load()  

core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '6', '44', '23', '87', '10', '55', '14', '30', '22', '27', '5', '31', '7', '18']


In [166]:
qualifying_2025.session_info

{'Meeting': {'Key': 1256,
  'Name': 'Japanese Grand Prix',
  'OfficialName': 'FORMULA 1 LENOVO JAPANESE GRAND PRIX 2025 ',
  'Location': 'Suzuka',
  'Number': 3,
  'Country': {'Key': 4, 'Code': 'JPN', 'Name': 'Japan'},
  'Circuit': {'Key': 46, 'ShortName': 'Suzuka'}},
 'ArchiveStatus': {'Status': 'Generating'},
 'Key': 10002,
 'Type': 'Qualifying',
 'Name': 'Qualifying',
 'StartDate': datetime.datetime(2025, 4, 5, 15, 0),
 'EndDate': datetime.datetime(2025, 4, 5, 16, 0),
 'GmtOffset': datetime.timedelta(seconds=32400),
 'Path': '2025/2025-04-06_Japanese_Grand_Prix/2025-04-05_Qualifying/'}

In [167]:
laps_2024 = session_2024.laps[["Driver",'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']].copy()

for col in ['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']:
    laps_2024[f"{col} (s)"] = laps_2024[col].dt.total_seconds()

laps_2024.dropna(inplace=True)
laps_2024

Unnamed: 0,Driver,LapTime,Sector1Time,Sector2Time,Sector3Time,LapTime (s),Sector1Time (s),Sector2Time (s),Sector3Time (s)
3,VER,0 days 00:01:36.472000,0 days 00:00:34.772000,0 days 00:00:43.042000,0 days 00:00:18.658000,96.472,34.772,43.042,18.658
4,VER,0 days 00:01:36.437000,0 days 00:00:34.690000,0 days 00:00:43.035000,0 days 00:00:18.712000,96.437,34.690,43.035,18.712
5,VER,0 days 00:01:36.855000,0 days 00:00:34.965000,0 days 00:00:43.196000,0 days 00:00:18.694000,96.855,34.965,43.196,18.694
6,VER,0 days 00:01:36.970000,0 days 00:00:34.987000,0 days 00:00:43.300000,0 days 00:00:18.683000,96.970,34.987,43.300,18.683
7,VER,0 days 00:01:37.329000,0 days 00:00:35.112000,0 days 00:00:43.412000,0 days 00:00:18.805000,97.329,35.112,43.412,18.805
...,...,...,...,...,...,...,...,...,...
900,ZHO,0 days 00:01:58.402000,0 days 00:00:56.403000,0 days 00:00:43.265000,0 days 00:00:18.734000,118.402,56.403,43.265,18.734
901,ZHO,0 days 00:01:37.160000,0 days 00:00:35.194000,0 days 00:00:43.257000,0 days 00:00:18.709000,97.160,35.194,43.257,18.709
902,ZHO,0 days 00:01:37.500000,0 days 00:00:35.186000,0 days 00:00:43.455000,0 days 00:00:18.859000,97.500,35.186,43.455,18.859
903,ZHO,0 days 00:01:41.117000,0 days 00:00:35.334000,0 days 00:00:43.688000,0 days 00:00:22.095000,101.117,35.334,43.688,22.095


In [159]:
filtered_rows = laps_2024[laps_2024["Driver"].isin(["GAS", "ANT"])]
filtered_rows

Unnamed: 0,Driver,LapTime,Sector1Time,Sector2Time,Sector3Time,LapTime (s),Sector1Time (s),Sector2Time (s),Sector3Time (s)
791,GAS,0 days 00:02:18.362000,0 days 00:01:14.063000,0 days 00:00:45.103000,0 days 00:00:19.196000,138.362,74.063,45.103,19.196
792,GAS,0 days 00:01:39.925000,0 days 00:00:36.680000,0 days 00:00:44.410000,0 days 00:00:18.835000,99.925,36.68,44.41,18.835
793,GAS,0 days 00:01:39.473000,0 days 00:00:36.122000,0 days 00:00:44.509000,0 days 00:00:18.842000,99.473,36.122,44.509,18.842
794,GAS,0 days 00:01:40.257000,0 days 00:00:36.667000,0 days 00:00:44.516000,0 days 00:00:19.074000,100.257,36.667,44.516,19.074
795,GAS,0 days 00:01:40.248000,0 days 00:00:36.461000,0 days 00:00:44.726000,0 days 00:00:19.061000,100.248,36.461,44.726,19.061
796,GAS,0 days 00:01:41.363000,0 days 00:00:37.525000,0 days 00:00:44.759000,0 days 00:00:19.079000,101.363,37.525,44.759,19.079
797,GAS,0 days 00:01:40.780000,0 days 00:00:36.840000,0 days 00:00:44.704000,0 days 00:00:19.236000,100.78,36.84,44.704,19.236
798,GAS,0 days 00:01:41.117000,0 days 00:00:36.846000,0 days 00:00:44.878000,0 days 00:00:19.393000,101.117,36.846,44.878,19.393
799,GAS,0 days 00:01:41.628000,0 days 00:00:37.365000,0 days 00:00:45.056000,0 days 00:00:19.207000,101.628,37.365,45.056,19.207
800,GAS,0 days 00:01:41.107000,0 days 00:00:36.798000,0 days 00:00:45.061000,0 days 00:00:19.248000,101.107,36.798,45.061,19.248


In [168]:
sector_times_2024 = laps_2024.groupby("Driver")[["Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].mean().reset_index()
sector_times_2024


Unnamed: 0,Driver,Sector1Time (s),Sector2Time (s),Sector3Time (s)
0,ALO,35.8362,43.15248,18.8129
1,BOT,37.36938,43.88292,18.9306
2,GAS,37.5282,44.06974,19.07572
3,HAM,35.814,43.28116,18.75184
4,HUL,36.554531,43.537061,18.811857
5,LEC,35.49614,43.2402,18.68114
6,MAG,37.09926,43.90536,18.81042
7,NOR,35.65402,43.14868,18.7289
8,OCO,37.45232,43.9589,19.04762
9,PER,35.45464,43.05356,18.69824


In [169]:
qualifying_2025.results[["Abbreviation", 'Q1', 'Q2', 'Q3']]

Unnamed: 0,Abbreviation,Q1,Q2,Q3
1,VER,0 days 00:01:27.943000,0 days 00:01:27.502000,0 days 00:01:26.983000
4,NOR,0 days 00:01:27.845000,0 days 00:01:27.146000,0 days 00:01:26.995000
81,PIA,0 days 00:01:27.687000,0 days 00:01:27.507000,0 days 00:01:27.027000
16,LEC,0 days 00:01:27.920000,0 days 00:01:27.555000,0 days 00:01:27.299000
63,RUS,0 days 00:01:27.843000,0 days 00:01:27.400000,0 days 00:01:27.318000
12,ANT,0 days 00:01:27.968000,0 days 00:01:27.639000,0 days 00:01:27.555000
6,HAD,0 days 00:01:28.278000,0 days 00:01:27.775000,0 days 00:01:27.569000
44,HAM,0 days 00:01:27.942000,0 days 00:01:27.610000,0 days 00:01:27.610000
23,ALB,0 days 00:01:28.218000,0 days 00:01:27.783000,0 days 00:01:27.615000
87,BEA,0 days 00:01:28.228000,0 days 00:01:27.711000,0 days 00:01:27.867000


In [170]:
qualifying_data = qualifying_2025.results[['Abbreviation','Q3']].copy()
qualifying_data.dropna(subset=['Q3'], inplace=True)
qualifying_data['Qualifying Data (s)'] = qualifying_data['Q3'].dt.total_seconds()
qualifying_data.rename(columns={'Abbreviation': 'Driver'}, inplace=True)
qualifying_data = qualifying_data[["Driver", "Qualifying Data (s)"]]
qualifying_data.reset_index(drop=True, inplace=True)
qualifying_data

Unnamed: 0,Driver,Qualifying Data (s)
0,VER,86.983
1,NOR,86.995
2,PIA,87.027
3,LEC,87.299
4,RUS,87.318
5,ANT,87.555
6,HAD,87.569
7,HAM,87.61
8,ALB,87.615
9,BEA,87.867


In [171]:
qualifying_3_players = pd.DataFrame({
    "Driver": ["GAS", "ALO", "TSU", 'SAI'],
    "Qualifying Data (s)": [87.822, 87.897, 88.000, 87.836]
})

qualifying_data = pd.concat([qualifying_data, qualifying_3_players], ignore_index=True)
qualifying_data


Unnamed: 0,Driver,Qualifying Data (s)
0,VER,86.983
1,NOR,86.995
2,PIA,87.027
3,LEC,87.299
4,RUS,87.318
5,ANT,87.555
6,HAD,87.569
7,HAM,87.61
8,ALB,87.615
9,BEA,87.867


In [173]:
qualifying_data = qualifying_data[~qualifying_data["Driver"].isin(["ANT", "HAD", "ALB", 'BEA'])].reset_index(drop=True)
qualifying_data

Unnamed: 0,Driver,Qualifying Data (s)
0,VER,86.983
1,NOR,86.995
2,PIA,87.027
3,LEC,87.299
4,RUS,87.318
5,HAM,87.61
6,GAS,87.822
7,ALO,87.897
8,TSU,88.0
9,SAI,87.836


In [174]:
driver_wet_performance = {
    "VER": 0.975196, 
    "HAM": 0.976464,  
    "LEC": 0.975862,  
    "NOR": 0.978179,  
    "ALO": 0.972655,  
    "RUS": 0.968678,  
    "SAI": 0.978754,  
    "TSU": 0.996338,  
    "OCO": 0.981810,  
    "GAS": 0.978832,  
    "STR": 0.979857   
}

In [175]:
qualifying_data["WetPerformanceFactor"] = qualifying_data["Driver"].map(driver_wet_performance)
qualifying_data

Unnamed: 0,Driver,Qualifying Data (s),WetPerformanceFactor
0,VER,86.983,0.975196
1,NOR,86.995,0.978179
2,PIA,87.027,
3,LEC,87.299,0.975862
4,RUS,87.318,0.968678
5,HAM,87.61,0.976464
6,GAS,87.822,0.978832
7,ALO,87.897,0.972655
8,TSU,88.0,0.996338
9,SAI,87.836,0.978754


In [176]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
qualifying_data["WetPerformanceFactor"] = imputer.fit_transform(qualifying_data[["WetPerformanceFactor"]])
qualifying_data

Unnamed: 0,Driver,Qualifying Data (s),WetPerformanceFactor
0,VER,86.983,0.975196
1,NOR,86.995,0.978179
2,PIA,87.027,0.977884
3,LEC,87.299,0.975862
4,RUS,87.318,0.968678
5,HAM,87.61,0.976464
6,GAS,87.822,0.978832
7,ALO,87.897,0.972655
8,TSU,88.0,0.996338
9,SAI,87.836,0.978754


ANT, HAD, ALB are attended at JGP in 2025 but did not have records in 2024 so we may exclude them and add racers at 11,12,13 places 

In [136]:
import requests

# Weather Data
API_KEY = "af882fd80d359563e59defd6cd4e005f"
weather_url = f"http://api.openweathermap.org/data/2.5/forecast?lat=34.8823&lon=136.5845&dt={time}&appid={API_KEY}&units=metric"

weather_url

'http://api.openweathermap.org/data/2.5/forecast?lat=34.8823&lon=136.5845&dt=1743420000&appid=af882fd80d359563e59defd6cd4e005f&units=metric'

In [113]:
response = requests.get(weather_url)
response

<Response [200]>

In [114]:
weather_data = response.json()
print(weather_data)

{'cod': '200', 'message': 0, 'cnt': 40, 'list': [{'dt': 1754902800, 'main': {'temp': 28.74, 'feels_like': 33.83, 'temp_min': 28.35, 'temp_max': 28.74, 'pressure': 1007, 'sea_level': 1007, 'grnd_level': 1000, 'humidity': 79, 'temp_kf': 0.39}, 'weather': [{'id': 500, 'main': 'Rain', 'description': 'light rain', 'icon': '10d'}], 'clouds': {'all': 100}, 'wind': {'speed': 1.89, 'deg': 183, 'gust': 2.06}, 'visibility': 10000, 'pop': 1, 'rain': {'3h': 1.9}, 'sys': {'pod': 'd'}, 'dt_txt': '2025-08-11 09:00:00'}, {'dt': 1754913600, 'main': {'temp': 28.03, 'feels_like': 32.82, 'temp_min': 27.58, 'temp_max': 28.03, 'pressure': 1008, 'sea_level': 1008, 'grnd_level': 1001, 'humidity': 84, 'temp_kf': 0.45}, 'weather': [{'id': 500, 'main': 'Rain', 'description': 'light rain', 'icon': '10n'}], 'clouds': {'all': 100}, 'wind': {'speed': 1.03, 'deg': 135, 'gust': 1.4}, 'visibility': 10000, 'pop': 1, 'rain': {'3h': 2.04}, 'sys': {'pod': 'n'}, 'dt_txt': '2025-08-11 12:00:00'}, {'dt': 1754924400, 'main': {'

In [115]:
forecast_time = "2025-08-12 12:00:00"

for i in weather_data['list']:
    if i["dt_txt"] == forecast_time:
        print(i['dt_txt'], i['main']['temp'], i['pop'])

2025-08-12 12:00:00 25.83 0


In [116]:
# Extract the relevant weather data for the race (Sunday at 2pm local time)
forecast_time = "2025-08-12 12:00:00"
forecast_data = None

for forecast in weather_data["list"]:
    if forecast["dt_txt"] == forecast_time:
        forecast_data = forecast
        break

if forecast_data:
    rain_probability = forecast_data["pop"]
    temperature = forecast_data["main"]["temp"]  
else:
    rain_probability = 0 
    temperature = 20 

In [117]:
rain_probability

0

In [118]:
temperature

25.83

In [177]:
merged_data = qualifying_data.merge(sector_times_2024, left_on="Driver", right_on="Driver", how="left")
merged_data['Rain Probability'] = rain_probability
merged_data['Temperature'] = temperature
merged_data

Unnamed: 0,Driver,Qualifying Data (s),WetPerformanceFactor,Sector1Time (s),Sector2Time (s),Sector3Time (s),Rain Probability,Temperature
0,VER,86.983,0.975196,35.46864,42.85772,18.6492,0,25.83
1,NOR,86.995,0.978179,35.65402,43.14868,18.7289,0,25.83
2,PIA,87.027,0.977884,35.788,43.26872,18.79232,0,25.83
3,LEC,87.299,0.975862,35.49614,43.2402,18.68114,0,25.83
4,RUS,87.318,0.968678,35.80698,43.17834,18.78142,0,25.83
5,HAM,87.61,0.976464,35.814,43.28116,18.75184,0,25.83
6,GAS,87.822,0.978832,37.5282,44.06974,19.07572,0,25.83
7,ALO,87.897,0.972655,35.8362,43.15248,18.8129,0,25.83
8,TSU,88.0,0.996338,36.348796,43.591245,18.925653,0,25.83
9,SAI,87.836,0.978754,35.65906,43.05874,18.62778,0,25.83


In [178]:
X = merged_data[["Qualifying Data (s)", "Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)", "WetPerformanceFactor", "Rain Probability", "Temperature"]].fillna(0)
X

Unnamed: 0,Qualifying Data (s),Sector1Time (s),Sector2Time (s),Sector3Time (s),WetPerformanceFactor,Rain Probability,Temperature
0,86.983,35.46864,42.85772,18.6492,0.975196,0,25.83
1,86.995,35.65402,43.14868,18.7289,0.978179,0,25.83
2,87.027,35.788,43.26872,18.79232,0.977884,0,25.83
3,87.299,35.49614,43.2402,18.68114,0.975862,0,25.83
4,87.318,35.80698,43.17834,18.78142,0.968678,0,25.83
5,87.61,35.814,43.28116,18.75184,0.976464,0,25.83
6,87.822,37.5282,44.06974,19.07572,0.978832,0,25.83
7,87.897,35.8362,43.15248,18.8129,0.972655,0,25.83
8,88.0,36.348796,43.591245,18.925653,0.996338,0,25.83
9,87.836,35.65906,43.05874,18.62778,0.978754,0,25.83


In [179]:
lap_mean = laps_2024.groupby("Driver")["LapTime (s)"].mean()
lap_mean

Driver
ALO     97.801580
BOT    100.182900
GAS    100.673660
HAM     97.847000
HUL     98.903449
LEC     97.417480
MAG     99.815040
NOR     97.531600
OCO    100.458840
PER     97.206440
PIA     97.849040
RUS     97.766740
SAI     97.345580
SAR    100.866920
STR     99.019673
TSU     98.865694
VER     96.975560
ZHO    107.435100
Name: LapTime (s), dtype: float64

In [180]:
y = merged_data.merge(laps_2024.groupby("Driver")["LapTime (s)"].mean(), left_on="Driver", right_index=True)
y

Unnamed: 0,Driver,Qualifying Data (s),WetPerformanceFactor,Sector1Time (s),Sector2Time (s),Sector3Time (s),Rain Probability,Temperature,LapTime (s)
0,VER,86.983,0.975196,35.46864,42.85772,18.6492,0,25.83,96.97556
1,NOR,86.995,0.978179,35.65402,43.14868,18.7289,0,25.83,97.5316
2,PIA,87.027,0.977884,35.788,43.26872,18.79232,0,25.83,97.84904
3,LEC,87.299,0.975862,35.49614,43.2402,18.68114,0,25.83,97.41748
4,RUS,87.318,0.968678,35.80698,43.17834,18.78142,0,25.83,97.76674
5,HAM,87.61,0.976464,35.814,43.28116,18.75184,0,25.83,97.847
6,GAS,87.822,0.978832,37.5282,44.06974,19.07572,0,25.83,100.67366
7,ALO,87.897,0.972655,35.8362,43.15248,18.8129,0,25.83,97.80158
8,TSU,88.0,0.996338,36.348796,43.591245,18.925653,0,25.83,98.865694
9,SAI,87.836,0.978754,35.65906,43.05874,18.62778,0,25.83,97.34558


In [181]:
X_train, X_test, y_train, y_test = train_test_split(X, y["LapTime (s)"], test_size=0.2, random_state=38)

In [182]:
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, random_state=38)
model.fit(X_train, y_train)

In [183]:
# Predict race times using 2025 qualifying and sector data
predicted_race_times = model.predict(X)
qualifying_data["PredictedRaceTime (s)"] = predicted_race_times
qualifying_data = qualifying_data.sort_values(by="PredictedRaceTime (s)")
qualifying_data

Unnamed: 0,Driver,Qualifying Data (s),WetPerformanceFactor,PredictedRaceTime (s)
9,SAI,87.836,0.978754,97.34558
3,LEC,87.299,0.975862,97.41748
0,VER,86.983,0.975196,97.452317
1,NOR,86.995,0.978179,97.5316
4,RUS,87.318,0.968678,97.76674
7,ALO,87.897,0.972655,97.80158
5,HAM,87.61,0.976464,97.847
2,PIA,87.027,0.977884,97.84904
8,TSU,88.0,0.996338,98.620851
6,GAS,87.822,0.978832,100.67366


In [185]:
print("\n🏁 Predicted 2025 Japanese GP Winner🏁\n")
print(qualifying_data[["Driver", "PredictedRaceTime (s)"]])

# Evaluate 
y_pred = model.predict(X_test)
print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")


🏁 Predicted 2025 Japanese GP Winner🏁

  Driver  PredictedRaceTime (s)
9    SAI              97.345580
3    LEC              97.417480
0    VER              97.452317
1    NOR              97.531600
4    RUS              97.766740
7    ALO              97.801580
5    HAM              97.847000
2    PIA              97.849040
8    TSU              98.620851
6    GAS             100.673660

🔍 Model Error (MAE): 0.36 seconds
