In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.dataLoader import loadSession
from src.features import *

In [2]:
# loading all the sessions in Hungarian GP

fp1 = loadSession(2025, 'Hungary', 'FP1')
fp2 = loadSession(2025, 'Hungary', 'FP2')
fp3 = loadSession(2025, 'Hungary', 'FP3')
quali = loadSession(2025, 'Hungary', 'Q')

core           INFO 	Loading data for Hungarian Grand Prix - Practice 1 [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '10', '12', '16', '18', '22', '23', '30', '31', '34', '43', '44', '55', '63', '81', '87', '97']
core           INFO 	Loading data for Hungarian Grand Prix - Practice 2 [v3.6.0]
req            INFO 	Using

In [3]:
df = getBestLaps(fp1)
df = df.merge(getAvg_top3Laps(fp2), on="Driver", how="outer")
df = df.merge(getAvg_top3Laps(fp3), on="Driver", how="outer")
df = df.merge(getQualiResults(quali), on="Driver", how="outer")

In [4]:
from src.preProcessing import processFeatures

df_clean = processFeatures(df)
df["Practice 1_BestLap"] = pd.to_timedelta(df["Practice 1_BestLap"], errors="coerce")
df["Practice 1_BestLap"] = df["Practice 1_BestLap"].dt.total_seconds()

In [5]:
df = df[df['Driver'] != 'ARO']# Alonso was not in Fp1
df = df[df['Driver'] != 'DRU']
df.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position
0,ALB,76.984,77.409667,76.777333,20.0
1,ALO,,76.832667,76.108,5.0
2,ANT,76.88,76.746667,76.439667,15.0
4,BEA,76.878,77.133667,77.092667,11.0
5,BOR,77.652,78.008667,76.272,7.0


In [6]:
df['Practice 1_BestLap'] = df.apply(
  lambda row: (row['Practice 2_Top3Average'] + row['Practice 3_Top3Average']) / 2
  if pd.isna(row['Practice 1_BestLap']) else row['Practice 1_BestLap'],
  axis=1
) # giving an average lap time for those not attending fp1

In [7]:
df_dynamics = pd.read_csv("../data/processed/hungary_2025_full_features.csv")
df_dynamics.head()

Unnamed: 0,maxSpeed,avgCornerSpeed,avgThrottle,avgBrake,Driver
0,315.0,131.138462,70.135036,0.222628,LEC
1,319.0,131.614035,68.129151,0.169742,PIA
2,318.0,131.615385,66.260417,0.184028,NOR
3,321.0,130.491803,70.565543,0.183521,RUS
4,322.0,129.176471,67.391459,0.209964,ALO


In [8]:
df_combined = df.merge(df_dynamics, on='Driver', how='left')
df_combined.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position,maxSpeed,avgCornerSpeed,avgThrottle,avgBrake
0,ALB,76.984,77.409667,76.777333,20.0,323.0,131.164384,67.804054,0.212838
1,ALO,76.470333,76.832667,76.108,5.0,322.0,129.176471,67.391459,0.209964
2,ANT,76.88,76.746667,76.439667,15.0,321.0,131.602941,69.385965,0.189474
3,BEA,76.878,77.133667,77.092667,11.0,315.0,129.21875,68.612903,0.18638
4,BOR,77.652,78.008667,76.272,7.0,321.0,128.21875,70.55516,0.206406


In [9]:
import fastf1
from fastf1 import get_event_schedule
import pandas as pd
from datetime import datetime

fastf1.Cache.enable_cache('../data/raw')
schedule = get_event_schedule(2025)
schedule = schedule[schedule['EventFormat'] != 'Testing']  
hungary_round = schedule[schedule['Country'] == 'Hungary'].index[0]

past_rounds = schedule[schedule.index < hungary_round]
print(f"Rounds before Hungarian GP ({hungary_round}):")
display(past_rounds[['RoundNumber', 'EventName', 'Country', 'Location']])


Rounds before Hungarian GP (14):


Unnamed: 0,RoundNumber,EventName,Country,Location
0,0,Pre-Season Testing,Bahrain,Sakhir
1,1,Australian Grand Prix,Australia,Melbourne
2,2,Chinese Grand Prix,China,Shanghai
3,3,Japanese Grand Prix,Japan,Suzuka
4,4,Bahrain Grand Prix,Bahrain,Sakhir
5,5,Saudi Arabian Grand Prix,Saudi Arabia,Jeddah
6,6,Miami Grand Prix,United States,Miami
7,7,Emilia Romagna Grand Prix,Italy,Imola
8,8,Monaco Grand Prix,Monaco,Monaco
9,9,Spanish Grand Prix,Spain,Barcelona


In [10]:
from collections import defaultdict
fastf1.Cache.enable_cache('../data/raw')

schedule = get_event_schedule(2025)
schedule = schedule[schedule['EventFormat'] != 'Testing']

hungary_round = schedule[schedule['Country'] == 'Hungary'].index[0]
past_rounds = schedule[schedule.index < hungary_round]
qualifying_positions = defaultdict(list)
race_deltas = defaultdict(list)

for rnd in past_rounds.index:
    try:
        event = fastf1.get_event(2025, rnd)
        print(f"Processing {event['EventName']} ({rnd})...")
        session_quali = event.get_session('Q') 
        session_quali.load()
        for drv in session_quali.results.itertuples():
            driver_code = drv.Abbreviation
            if drv.Position > 0:
                qualifying_positions[driver_code].append(drv.Position)

        # RACE
        session_race = event.get_session('R')
        session_race.load()
        
        laps = session_race.laps.pick_quicklaps()
        if laps.empty:
            continue
        
        fastest_lap = laps.pick_fastest()
        fastest_time = fastest_lap['LapTime'].total_seconds()

        for drv in laps['Driver'].unique():
            drv_laps = laps.pick_driver(drv)
            if drv_laps.empty:
                continue
            best_lap = drv_laps.pick_fastest()
            delta = best_lap['LapTime'].total_seconds() - fastest_time
            race_deltas[drv].append(delta)

    except Exception as e:
        print(f"Skipping round {rnd} due to error: {e}")
        continue


Skipping round 0 due to error: Cannot get testing event by round number!


core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Processing Australian Grand Prix (1)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', '31', '87']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_cou

Processing Chinese Grand Prix (2)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count


Processing Japanese Grand Prix (3)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '6', '44', '23', '87', '10', '55', '14', '30', '22', '27', '5', '31', '7', '18']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count

Processing Bahrain Grand Prix (4)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '16', '12', '10', '4', '1', '55', '44', '22', '7', '6', '14', '31', '23', '27', '30', '5', '18', '87']
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count


Processing Saudi Arabian Grand Prix (5)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '63', '16', '12', '55', '44', '22', '10', '4', '23', '30', '14', '6', '87', '18', '7', '27', '31', '5']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_

Processing Miami Grand Prix (6)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '12', '81', '63', '55', '23', '16', '31', '22', '6', '44', '5', '7', '30', '27', '14', '10', '18', '87']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
re

Processing Emilia Romagna Grand Prix (7)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '63', '4', '14', '55', '23', '18', '6', '10', '16', '44', '12', '5', '43', '30', '27', '31', '87', '22']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for la

Processing Monaco Grand Prix (8)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '16', '81', '44', '1', '6', '14', '31', '30', '23', '55', '22', '27', '63', '12', '5', '87', '10', '18', '43']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count


Processing Spanish Grand Prix (9)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '1', '63', '44', '12', '16', '10', '6', '14', '23', '5', '30', '18', '87', '27', '31', '55', '43', '22']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count

Processing Canadian Grand Prix (10)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '1', '81', '12', '44', '14', '4', '16', '6', '23', '22', '43', '27', '87', '31', '5', '55', '18', '30', '10']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_coun

Processing Austrian Grand Prix (11)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '16', '81', '44', '63', '30', '1', '5', '12', '10', '14', '23', '6', '43', '87', '18', '31', '22', '55', '27']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_coun

Processing British Grand Prix (12)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '4', '63', '44', '16', '12', '87', '14', '10', '55', '22', '6', '23', '31', '30', '5', '18', '27', '43']
core           INFO 	Loading data for British Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count

Processing Belgian Grand Prix (13)...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '16', '1', '23', '63', '22', '6', '30', '5', '31', '87', '10', '27', '55', '44', '43', '12', '14', '18']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count

In [11]:
qualifying_df = pd.DataFrame([
    {'Driver': drv, 'AvgQualiPosition': sum(pos)/len(pos)}
    for drv, pos in qualifying_positions.items()
])

race_delta_df = pd.DataFrame([
    {'Driver': drv, 'AvgRacePaceDelta': sum(deltas)/len(deltas)}
    for drv, deltas in race_deltas.items()
])

df_contextual = pd.merge(qualifying_df, race_delta_df, on='Driver', how='outer')
df_contextual.sort_values('AvgQualiPosition', inplace=True)
print("\n Final contextual DataFrame:")
display(df_contextual)


 Final contextual DataFrame:


Unnamed: 0,Driver,AvgQualiPosition,AvgRacePaceDelta
15,PIA,2.153846,0.349
20,VER,3.153846,0.682333
13,NOR,3.307692,0.241692
16,RUS,4.461538,1.093615
12,LEC,5.461538,1.077308
9,HAM,7.692308,0.928923
2,ANT,8.769231,1.155
0,ALB,9.769231,1.700769
8,HAD,9.923077,1.745273
1,ALO,11.384615,1.84275


In [12]:
df_combined = df.merge(df_contextual, on='Driver', how='left')
df_combined.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position,AvgQualiPosition,AvgRacePaceDelta
0,ALB,76.984,77.409667,76.777333,20.0,9.769231,1.700769
1,ALO,76.470333,76.832667,76.108,5.0,11.384615,1.84275
2,ANT,76.88,76.746667,76.439667,15.0,8.769231,1.155
3,BEA,76.878,77.133667,77.092667,11.0,15.538462,2.108077
4,BOR,77.652,78.008667,76.272,7.0,15.0,1.9665


In [13]:
from fastf1 import get_session, Cache
from tqdm import tqdm
import pandas as pd
import numpy as np

Cache.enable_cache('../data/raw')

year = 2025
gp = 'Hungary'
session_types = ['Q']

driver_data = {}

for session_type in session_types:
    print(f"\n⏳ Processing {session_type} session...")

    session = get_session(year, gp, session_type)
    session.load(telemetry=True)

    for drv in tqdm(session.drivers):
        try:
            driver_abbr = session.get_driver(drv)['Abbreviation']
            drv_laps = session.laps.pick_driver(drv)
            if drv_laps.empty:
                continue

            fastest_lap = drv_laps.pick_fastest()
            telemetry = fastest_lap.get_telemetry()

            # Mean sector times
            sector1 = drv_laps['Sector1Time'].mean().total_seconds()
            sector2 = drv_laps['Sector2Time'].mean().total_seconds()
            sector3 = drv_laps['Sector3Time'].mean().total_seconds()

            # Speed performance
            high_speed_perf = telemetry[telemetry['Speed'] > 200]['Speed'].mean()
            low_speed_perf = telemetry[telemetry['Speed'] < 100]['Speed'].mean()

            # Brake usage
            avg_brake_pressure = telemetry[telemetry['Brake'] > 0]['Brake'].mean()

            # Throttle aggressiveness
            telemetry['ThrottleDiff'] = telemetry['Throttle'].diff().abs()
            throttle_aggressiveness = telemetry['ThrottleDiff'].mean()

            if driver_abbr not in driver_data:
                driver_data[driver_abbr] = {
                    'Sector1Time_mean': [],
                    'Sector2Time_mean': [],
                    'Sector3Time_mean': [],
                    'HighSpeedCornerPerf': [],
                    'LowSpeedCornerPerf': [],
                    'AvgBrakePressure': [],
                    'ThrottleAggressiveness': []
                }

            # Append values for averaging later
            driver_data[driver_abbr]['Sector1Time_mean'].append(sector1)
            driver_data[driver_abbr]['Sector2Time_mean'].append(sector2)
            driver_data[driver_abbr]['Sector3Time_mean'].append(sector3)
            driver_data[driver_abbr]['HighSpeedCornerPerf'].append(high_speed_perf)
            driver_data[driver_abbr]['LowSpeedCornerPerf'].append(low_speed_perf)
            driver_data[driver_abbr]['AvgBrakePressure'].append(avg_brake_pressure)
            driver_data[driver_abbr]['ThrottleAggressiveness'].append(throttle_aggressiveness)

        except Exception as e:
            print(f"⚠️ Error processing {drv} in {session_type}: {e}")
            continue

# Aggregate all sessions into mean per driver
final_features = []
for driver, feats in driver_data.items():
    final_features.append({
        'Driver': driver,
        'Sector1Time_mean': np.nanmean(feats['Sector1Time_mean']),
        'Sector2Time_mean': np.nanmean(feats['Sector2Time_mean']),
        'Sector3Time_mean': np.nanmean(feats['Sector3Time_mean']),
        'HighSpeedCornerPerf': np.nanmean(feats['HighSpeedCornerPerf']),
        'LowSpeedCornerPerf': np.nanmean(feats['LowSpeedCornerPerf']),
        'AvgBrakePressure': np.nanmean(feats['AvgBrakePressure']),
        'ThrottleAggressiveness': np.nanmean(feats['ThrottleAggressiveness']),
    })

df_driver_features_2025 = pd.DataFrame(final_features)
print(df_driver_features_2025.head())

core           INFO 	Loading data for Hungarian Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info



⏳ Processing Q session...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '63', '14', '18', '5', '1', '30', '6', '87', '44', '55', '43', '12', '22', '10', '31', '27', '23']
100%|██████████| 20/20 [00:08<00:00,  2.38it/s]

  Driver  Sector1Time_mean  Sector2Time_mean  Sector3Time_mean  \
0    LEC          31.37025         31.954611         26.415555   
1    PIA          32.70950         32.735944         26.609277   
2    NOR          32.01875         32.804722         25.992555   
3    RUS          31.81010         33.275000         26.841714   
4    ALO          31.92990         34.178666         26.428733   

   HighSpeedCornerPerf  LowSpeedCornerPerf  AvgBrakePressure  \
0           255.241512           96.213542               1.0   
1           254.096901           96.227851               1.0   
2           254.083531           98.000000               1.0   
3           253.839302           96.470487               1.0   
4           251.810739           98.435715               1.0   

   ThrottleAggressiveness  
0                3.734982  
1                4.032432  
2                3.814685  
3                3.905455  
4                4.001611  





In [14]:
print(df_driver_features_2025)

   Driver  Sector1Time_mean  Sector2Time_mean  Sector3Time_mean  \
0     LEC         31.370250         31.954611         26.415555   
1     PIA         32.709500         32.735944         26.609277   
2     NOR         32.018750         32.804722         25.992555   
3     RUS         31.810100         33.275000         26.841714   
4     ALO         31.929900         34.178666         26.428733   
5     STR         32.039833         33.622444         26.277166   
6     BOR         32.195833         32.829555         26.256117   
7     VER         31.126833         33.049222         25.594823   
8     LAW         30.954083         32.883888         26.837294   
9     HAD         32.741300         31.809200         26.809928   
10    BEA         32.266750         33.082416         28.380916   
11    HAM         32.740250         34.149666         27.579000   
12    SAI         36.805181         32.318466         27.531000   
13    COL         31.580375         32.582250         25.53550

In [15]:
df_combined = df.merge(df_driver_features_2025, on='Driver', how='left')
df_combined.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position,Sector1Time_mean,Sector2Time_mean,Sector3Time_mean,HighSpeedCornerPerf,LowSpeedCornerPerf,AvgBrakePressure,ThrottleAggressiveness
0,ALB,76.984,77.409667,76.777333,20.0,37.700142,32.168555,27.138666,252.173437,96.572114,1.0,3.334448
1,ALO,76.470333,76.832667,76.108,5.0,31.9299,34.178666,26.428733,251.810739,98.435715,1.0,4.001611
2,ANT,76.88,76.746667,76.439667,15.0,32.515,34.369333,27.808777,253.983273,97.1,1.0,3.685121
3,BEA,76.878,77.133667,77.092667,11.0,32.26675,33.082416,28.380916,252.922881,95.242163,1.0,3.766784
4,BOR,77.652,78.008667,76.272,7.0,32.195833,32.829555,26.256117,253.215524,96.13951,1.0,3.760832


In [16]:
df_contextual = df_contextual[df_contextual['Driver'] != 'DOO']

In [17]:
df_combined = df.merge(df_contextual, on='Driver', how='left')
df_combined.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position,AvgQualiPosition,AvgRacePaceDelta
0,ALB,76.984,77.409667,76.777333,20.0,9.769231,1.700769
1,ALO,76.470333,76.832667,76.108,5.0,11.384615,1.84275
2,ANT,76.88,76.746667,76.439667,15.0,8.769231,1.155
3,BEA,76.878,77.133667,77.092667,11.0,15.538462,2.108077
4,BOR,77.652,78.008667,76.272,7.0,15.0,1.9665


In [18]:
common_drivers = df_combined['Driver'].isin(df_contextual['Driver'])
df_combined = df_combined[common_drivers]
df_combined = df_combined.sort_values(by='Driver').reset_index(drop=True)
df_contextual = df_contextual.sort_values(by='Driver').reset_index(drop=True)
df_final = df_combined.merge(df_contextual, on='Driver', how='inner')

In [19]:
for col in df_final.columns:
  print(col)

Driver
Practice 1_BestLap
Practice 2_Top3Average
Practice 3_Top3Average
Quali_Position
AvgQualiPosition_x
AvgRacePaceDelta_x
AvgQualiPosition_y
AvgRacePaceDelta_y


In [22]:
df_final = df_final.rename(columns={'AvgBrakePressure': 'avgBrake'})
#df_final = df_final.drop(columns=['HighSpeedCornerPerf'])
df_final.head()

Unnamed: 0,Driver,Practice 1_BestLap,Practice 2_Top3Average,Practice 3_Top3Average,Quali_Position,AvgQualiPosition_x,AvgRacePaceDelta_x,AvgQualiPosition_y,AvgRacePaceDelta_y
0,ALB,76.984,77.409667,76.777333,20.0,9.769231,1.700769,9.769231,1.700769
1,ALO,76.470333,76.832667,76.108,5.0,11.384615,1.84275,11.384615,1.84275
2,ANT,76.88,76.746667,76.439667,15.0,8.769231,1.155,8.769231,1.155
3,BEA,76.878,77.133667,77.092667,11.0,15.538462,2.108077,15.538462,2.108077
4,BOR,77.652,78.008667,76.272,7.0,15.0,1.9665,15.0,1.9665


In [23]:
df_final.to_csv("2025_Hungary_ModelFeatures.csv", index=False)

In [24]:
for col in df_final:
  print(col)
  

Driver
Practice 1_BestLap
Practice 2_Top3Average
Practice 3_Top3Average
Quali_Position
AvgQualiPosition_x
AvgRacePaceDelta_x
AvgQualiPosition_y
AvgRacePaceDelta_y
