Impact of Weather on Qualifying vs. Race Pace (Regression & T-Test)


In [109]:
import fastf1
import fastf1.plotting
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib.ticker import FuncFormatter
import os
from scipy.stats import f_oneway
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [98]:
# Parameters
circuits = ["monaco", "unitedkingdom", "emiliaromagna", "netherlands", "japan", "australia", "azerbaijan", "belgium", "mexico", "abudhabi", "saudiarabia"]
drivers = ["VER", "NOR", "RUS", "SAI"]
years = [2024, 2023, 2022, 2021]

track_lengths = {
    'monaco': 3337,          # Circuit de Monaco
    'unitedkingdom': 5891,   # Silverstone Circuit
    'emiliaromagna': 4909,   # Autodromo Internazionale Enzo e Dino Ferrari
    'netherlands': 4259,     # Circuit Zandvoort
    'japan': 5807,           # Suzuka International Racing Course
    'australia': 5303,       # Albert Park Circuit
    'azerbaijan': 6003,      # Baku City Circuit
    'belgium': 7004,         # Circuit de Spa-Francorchamps
    'mexico': 4304,          # Autódromo Hermanos Rodríguez
    'abudhabi': 5281,        # Yas Marina Circuit
    'saudiarabia': 6174      # Jeddah Corniche Circuit
}

**Getting Data Using FastF1 API!**

In [99]:
# output_dir = f'fastlaps_tires_graphs'
# os.makedirs(output_dir, exist_ok=True)

# all_lap_times = []

# count = 0
# for circuit in circuits:
#     for year in years:
#         session = fastf1.get_session(year, circuit, 'R')
#         session.load(weather=True)

#         track_length_m = track_lengths.get(circuit)

#         for driver_name in drivers:
#             laps = session.laps.pick_drivers(driver_name).pick_quicklaps().reset_index() # Get fast laps for the driver

#             # If no laps, skip (sometimes a driver may have no quicklaps)
#             if laps.empty:
#                 continue
            
#             #Get race details
#             laps['Circuit'] = circuit
#             laps['Year'] = year
#             laps['Driver'] = driver_name
#             laps['LapTimeSeconds'] = laps['LapTime'].dt.total_seconds()
#             laps["TrackLengthM"] = track_length_m
            
#             #Get weather detail
#             weather_data = laps.get_weather_data()
#             laps["WeatherTime"] = weather_data["Time"]
#             laps["AirTemp"] = weather_data["AirTemp"]
#             laps["Humidity"] = weather_data["Humidity"]
#             laps["Pressure"] = weather_data["Pressure"]
#             laps["Rainfall"] = weather_data["Rainfall"]
#             laps["TrackTemp"] = weather_data["TrackTemp"]
#             laps["WindDirection"] = weather_data["WindDirection"]
#             laps["WindSpeed"] = weather_data["WindSpeed"]

#             race_details = ['Year', 'LapNumber', 'LapTimeSeconds', 'Compound', 'Circuit', 'Driver', 'TrackLengthM', "TyreLife"]
#             weather_details = ["WeatherTime", "AirTemp", "Humidity", "Pressure",  "Rainfall", "TrackTemp", "WindDirection", "WindSpeed"]

#             all_lap_times.append(laps[race_details + weather_details])

#             #If you want to create graphs for each race for each driver in a season
#             # fig, ax = plt.subplots(figsize=(8, 6))
#             # sns.scatterplot(data=laps,
#             #                 x="LapNumber",
#             #                 y="LapTimeSeconds",
#             #                 ax=ax,
#             #                 hue="Compound",
#             #                 palette=fastf1.plotting.get_compound_mapping(session=session),
#             #                 s=80,
#             #                 linewidth=0,
#             #                 legend='auto')
            
#             # ax.set_title(f'{year} {circuit.capitalize()} GP - {driver_name} Fast Laps')
#             # ax.set_ylabel('Lap Time (s)')
#             # ax.set_xlabel('Lap Number')
#             # ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{x:.2f}'))

#             # plt.legend(title='Compound', bbox_to_anchor=(1.05, 1), loc='upper left')
#             # plt.tight_layout()
            
#             # fig.savefig(f'{output_dir}/{year}_{circuit}_{driver_name}.png')
#             # plt.close(fig)
# df = pd.concat(all_lap_times, ignore_index=True)

# # Save combined DataFrame to CSV
# df.to_csv(f'Data.csv', index=False)

# #around 13 minutes for this process

In [107]:
df = pd.read_csv("Data.csv")

# Normalize to account for differing track lengths
df['LapTimePerKm'] = df['LapTimeSeconds'] / (df['TrackLengthM']/1000)

tyres = df["Compound"].unique()
print(tyres)

df.head()

['MEDIUM' 'HARD' 'SOFT' 'INTERMEDIATE' 'WET' 'UNKNOWN']


Unnamed: 0,Year,LapNumber,LapTimeSeconds,Compound,Circuit,Driver,TrackLengthM,TyreLife,WeatherTime,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,LapTimePerKm
0,2024,14.0,79.723,MEDIUM,monaco,VER,3337,13.0,,,,,,,,,23.89062
1,2024,18.0,79.58,MEDIUM,monaco,VER,3337,17.0,,,,,,,,,23.847767
2,2024,23.0,78.979,MEDIUM,monaco,VER,3337,22.0,,,,,,,,,23.667666
3,2024,24.0,79.364,MEDIUM,monaco,VER,3337,23.0,,,,,,,,,23.783039
4,2024,25.0,79.38,MEDIUM,monaco,VER,3337,24.0,,,,,,,,,23.787833


Simple One-way ANOVA

In [None]:
# Store results for each driver
driver_best_circuit = {}

for driver in drivers:
    driver_data = df[df['Driver'] == driver]
    
    # Group lap times by circuit
    groups = []
    circuits = driver_data['Circuit'].unique()

    # Skip drivers with data for only 1 circuit (ANOVA needs ≥2 groups)
    if len(circuits) < 2:
        print(f"Driver {driver} has data for only 1 circuit. Skipping ANOVA.")
        continue
    
    for circuit in circuits:
        circuit_lap_times = driver_data[driver_data['Circuit'] == circuit]['LapTimePerKm'].values 
        groups.append(circuit_lap_times)
    
    # Perform One-way ANOVA
    f_stat, p_value = f_oneway(*groups)

    print(f"\nDriver: {driver}")
    print(f"ANOVA p-value: {p_value:.4f}")
    
    if p_value > 0 and p_value < 0.05: 
        # Find circuit with lowest mean lap time (fastest)
        mean_lap_times = driver_data.groupby('Circuit')['LapTimePerKm'].mean()
        best_circuit = mean_lap_times.idxmin()
        best_mean_time = mean_lap_times.min()
        
        print(f"Best Circuit: {best_circuit} (Mean Lap Time: {best_mean_time:.3f} sec)")
    else:
        print("No significant difference between circuits.")

# Display results
for driver, result in driver_best_circuit.items():
    print(f"Driver: {driver}")
    print(f"Best Circuit: {result['best_circuit']}")
    if 'mean_lap_time' in result:
        print(f"Mean Lap Time: {result['mean_lap_time']:.3f} sec")
    print(f"ANOVA p-value: {result['p_value']:.10f}")
    print("------")


Driver: VER
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: NOR
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: RUS
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: SAI
ANOVA p-value: 0.0000
No significant difference between circuits.


Mixed-Effects Model

In [110]:
# Multi-level model accounting for repeated measures
model = smf.mixedlm("LapTimePerKm ~ C(Compound) + C(Circuit) + C(Year)",
                    data=df, 
                    groups=df["Driver"],
                    re_formula="~C(Compound)")  # Allow tire effects to vary by driver

result = model.fit()
print(result.summary())



LinAlgError: Singular matrix