In [29]:
import sys
sys.path.append('../utils')

In [30]:
import pipeline
import importlib
import pandas as pd

importlib.reload(pipeline)
from pipeline import extract_session_data, clean_session_data, get_driver_laps, get_manual_finish_dict
from pipeline import assemble_race_dataset, extract_fp2_features, extract_quali_features, assemble_race_dataset_pre_race
from fastf1 import get_session

In [31]:
# Azerbaijan GP 2025 (Round 17)
# This cell was made before the race so it excludes the race finish data 
df_fp2_17 = extract_session_data(2025, "Azerbaijan", "FP2")
df_quali_17 = extract_session_data(2025, "Azerbaijan", "Q")

fp2_clean_17 = clean_session_data(df_fp2_17)
quali_clean_17 = clean_session_data(df_quali_17)

fp2_features_17 = extract_fp2_features(fp2_clean_17)

quali_features_17 = extract_quali_features(quali_clean_17)
                       
race_df_17 = assemble_race_dataset_pre_race(fp2_features_17, quali_features_17)
race_df_17["grand_prix"] = "Azerbaijan"
race_df_17

core           INFO 	Loading data for Azerbaijan Grand Prix - Practice 2 [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '10', '12', '14', '16', '18', '22', '23', '27', '30', '31', '43', '44', '55', '63', '81', '87']
core           INFO 	Loading data for Azerbaijan Grand Prix - Qualifying [v3.4.4]
req            INFO 	Usi

Unnamed: 0,Driver,fp2_avg_lap,fp2_best_lap,fp2_total_laps,FastestQualiLap,QualiPosition,grand_prix
0,ALB,109.180286,102.177,14,103.778,20,Azerbaijan
1,ALO,119.446529,102.967,17,101.857,12,Azerbaijan
2,ANT,109.124,101.779,10,101.464,6,Azerbaijan
3,BEA,117.111133,101.891,15,102.666,15,Azerbaijan
4,BOR,110.747818,102.561,11,102.277,14,Azerbaijan
5,COL,106.80625,103.322,12,102.779,16,Azerbaijan
6,GAS,119.708308,102.674,13,103.139,19,Azerbaijan
7,HAD,108.606538,102.443,13,101.647,9,Azerbaijan
8,HAM,107.718333,101.293,9,101.821,11,Azerbaijan
9,HUL,105.979333,102.82,12,102.916,17,Azerbaijan


In [32]:
race_df_17.to_csv("../data/r17_azerbaijan_2025.csv", index=False)

In [33]:
import joblib

model  = joblib.load("../models/logreg_scaled_quali_pre_r17_azerbaijan.pkl")
scaler = joblib.load("../models/scaler_pre_r17_azerbaijan.pkl")

In [34]:
import pandas as pd

df_azerbaijan = pd.read_csv('../data/r17_azerbaijan_2025.csv')
df_azerbaijan.head()

Unnamed: 0,Driver,fp2_avg_lap,fp2_best_lap,fp2_total_laps,FastestQualiLap,QualiPosition,grand_prix
0,ALB,109.180286,102.177,14,103.778,20,Azerbaijan
1,ALO,119.446529,102.967,17,101.857,12,Azerbaijan
2,ANT,109.124,101.779,10,101.464,6,Azerbaijan
3,BEA,117.111133,101.891,15,102.666,15,Azerbaijan
4,BOR,110.747818,102.561,11,102.277,14,Azerbaijan


In [35]:
feature_cols = ['fp2_avg_lap', 'fp2_best_lap', 'fp2_total_laps', 'FastestQualiLap', 'QualiPosition'] 
X_azerbaijan = df_azerbaijan[feature_cols]

X_azerbaijan_scaled = scaler.transform(X_azerbaijan) 
proba_podium = model.predict_proba(X_azerbaijan_scaled)[:, 1]

results = df_azerbaijan.copy()
results["podium_probability"] = proba_podium

results_sorted = results.sort_values(by="podium_probability", ascending=False)

predicted_podium = results_sorted[["Driver", "podium_probability"]]

In [36]:
predicted_podium

Unnamed: 0,Driver,podium_probability
19,VER,0.938535
14,PIA,0.859299
15,RUS,0.781816
11,LEC,0.726574
12,NOR,0.662566
10,LAW,0.616025
2,ANT,0.543121
16,SAI,0.374054
7,HAD,0.26185
1,ALO,0.207599


In [37]:
predicted_podium.head(3)

Unnamed: 0,Driver,podium_probability
19,VER,0.938535
14,PIA,0.859299
15,RUS,0.781816
