Impact of Weather on Qualifying vs. Race Pace (Regression & T-Test)


In [17]:
import fastf1
import fastf1.plotting
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib.ticker import FuncFormatter
import os
from scipy.stats import f_oneway
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols


In [18]:
# Parameters
circuits = ["monaco", "unitedkingdom", "emiliaromagna", "netherlands", "japan", "australia", "azerbaijan", "belgium", "mexico", "abudhabi", "saudiarabia"]
drivers = ["VER", "NOR", "RUS", "SAI"]
years = [2024, 2023, 2022, 2021]

track_lengths = {
    'monaco': 3337,          # Circuit de Monaco
    'unitedkingdom': 5891,   # Silverstone Circuit
    'emiliaromagna': 4909,   # Autodromo Internazionale Enzo e Dino Ferrari
    'netherlands': 4259,     # Circuit Zandvoort
    'japan': 5807,           # Suzuka International Racing Course
    'australia': 5303,       # Albert Park Circuit
    'azerbaijan': 6003,      # Baku City Circuit
    'belgium': 7004,         # Circuit de Spa-Francorchamps
    'mexico': 4304,          # Autódromo Hermanos Rodríguez
    'abudhabi': 5281,        # Yas Marina Circuit
    'saudiarabia': 6174      # Jeddah Corniche Circuit
}

**Getting Data Using FastF1 API!**

In [19]:
# output_dir = f'fastlaps_tires_graphs'
# os.makedirs(output_dir, exist_ok=True)

# all_lap_times = []

# count = 0
# for circuit in circuits:
#     for year in years:
#         session = fastf1.get_session(year, circuit, 'R')
#         session.load(weather=True)

#         track_length_m = track_lengths.get(circuit)

#         for driver_name in drivers:
#             laps = session.laps.pick_drivers(driver_name).pick_quicklaps().reset_index() # Get fast laps for the driver

#             # If no laps, skip (sometimes a driver may have no quicklaps)
#             if laps.empty:
#                 continue
            
#             #Get race details
#             laps['Circuit'] = circuit
#             laps['Year'] = year
#             laps['Driver'] = driver_name
#             laps['LapTimeSeconds'] = laps['LapTime'].dt.total_seconds()
#             laps["TrackLengthM"] = track_length_m
            
#             #Get weather detail
#             weather_data = laps.get_weather_data()
#             laps["WeatherTime"] = weather_data["Time"]
#             laps["AirTemp"] = weather_data["AirTemp"]
#             laps["Humidity"] = weather_data["Humidity"]
#             laps["Pressure"] = weather_data["Pressure"]
#             laps["Rainfall"] = weather_data["Rainfall"]
#             laps["TrackTemp"] = weather_data["TrackTemp"]
#             laps["WindDirection"] = weather_data["WindDirection"]
#             laps["WindSpeed"] = weather_data["WindSpeed"]

#             race_details = ['Year', 'LapNumber', 'LapTimeSeconds', 'Compound', 'Circuit', 'Driver', 'TrackLengthM', "TyreLife"]
#             weather_details = ["WeatherTime", "AirTemp", "Humidity", "Pressure",  "Rainfall", "TrackTemp", "WindDirection", "WindSpeed"]

#             all_lap_times.append(laps[race_details + weather_details])

#             #If you want to create graphs for each race for each driver in a season
#             # fig, ax = plt.subplots(figsize=(8, 6))
#             # sns.scatterplot(data=laps,
#             #                 x="LapNumber",
#             #                 y="LapTimeSeconds",
#             #                 ax=ax,
#             #                 hue="Compound",
#             #                 palette=fastf1.plotting.get_compound_mapping(session=session),
#             #                 s=80,
#             #                 linewidth=0,
#             #                 legend='auto')
            
#             # ax.set_title(f'{year} {circuit.capitalize()} GP - {driver_name} Fast Laps')
#             # ax.set_ylabel('Lap Time (s)')
#             # ax.set_xlabel('Lap Number')
#             # ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{x:.2f}'))

#             # plt.legend(title='Compound', bbox_to_anchor=(1.05, 1), loc='upper left')
#             # plt.tight_layout()
            
#             # fig.savefig(f'{output_dir}/{year}_{circuit}_{driver_name}.png')
#             # plt.close(fig)
# df = pd.concat(all_lap_times, ignore_index=True)

# # Save combined DataFrame to CSV
# df.to_csv(f'Data.csv', index=False)

# #around 13 minutes for this process

In [20]:
df = pd.read_csv("Data.csv")

# Normalize to account for differing track lengths
df['LapTimePerKm'] = df['LapTimeSeconds'] / (df['TrackLengthM']/1000)

tyres = df["Compound"].unique()
print(tyres)

df.head()

['MEDIUM' 'HARD' 'SOFT' 'INTERMEDIATE' 'WET' 'UNKNOWN']


Unnamed: 0,Year,LapNumber,LapTimeSeconds,Compound,Circuit,Driver,TrackLengthM,TyreLife,WeatherTime,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,LapTimePerKm
0,2024,14.0,79.723,MEDIUM,monaco,VER,3337,13.0,,,,,,,,,23.89062
1,2024,18.0,79.58,MEDIUM,monaco,VER,3337,17.0,,,,,,,,,23.847767
2,2024,23.0,78.979,MEDIUM,monaco,VER,3337,22.0,,,,,,,,,23.667666
3,2024,24.0,79.364,MEDIUM,monaco,VER,3337,23.0,,,,,,,,,23.783039
4,2024,25.0,79.38,MEDIUM,monaco,VER,3337,24.0,,,,,,,,,23.787833


In [21]:
# Group by Circuit and Driver, calculate performance metrics
circuit_performance = df.groupby(['Circuit', 'Driver']).agg({
    'LapTimeSeconds': ['mean', 'std'],
    'LapTimePerKm': ['mean', 'std']
}).reset_index()

circuit_performance.columns = ['Circuit', 'Driver', 'Avg_LapTime', 'StdDev_LapTime', 'Avg_LapTimePerKm', 'StdDev_LapTimePerKm']

# Identify driver strengths by sorting circuits from fastest to slowest for each driver
driver_circuit_rankings = circuit_performance.sort_values(['Driver', 'Avg_LapTime'])

# Calculate overall circuit performance ranking
circuit_overall_ranking = circuit_performance.groupby('Circuit')['Avg_LapTime'].mean().sort_values()

def analyze_driver_strengths(driver_circuit_rankings):
    """
    Analyze and print each driver's circuit strengths
    """
    for driver in driver_circuit_rankings['Driver'].unique():
        print(f"\nDriver {driver} Circuit Performance Ranking:")
        driver_data = driver_circuit_rankings[driver_circuit_rankings['Driver'] == driver]
        print(driver_data[['Circuit', 'Avg_LapTime', 'Avg_LapTimePerKm']].to_string(index=False))

print("Overall Circuit Performance Ranking:")
print(circuit_overall_ranking)

analyze_driver_strengths(driver_circuit_rankings)

Overall Circuit Performance Ranking:
Circuit
netherlands       75.935741
monaco            77.331982
australia         77.448774
mexico            82.622757
emiliaromagna     88.511459
abudhabi          89.679931
japan             93.612889
saudiarabia       94.192185
unitedkingdom    100.037579
azerbaijan       107.714637
belgium          112.583749
Name: Avg_LapTime, dtype: float64

Driver NOR Circuit Performance Ranking:
      Circuit  Avg_LapTime  Avg_LapTimePerKm
  netherlands    76.079153         17.863149
       monaco    77.452242         23.210141
    australia    78.377415         14.779826
       mexico    82.868274         19.253781
emiliaromagna    88.532632         18.034759
     abudhabi    89.683446         16.982285
        japan    94.392890         16.255018
  saudiarabia    94.770673         15.349963
unitedkingdom   100.215016         17.011546
   azerbaijan   108.229682         18.029266
      belgium   113.207353         16.163243

Driver RUS Circuit Performance 

Simple One-way ANOVA

In [22]:
# Store results for each driver
driver_best_circuit = {}

for driver in drivers:
    driver_data = df[df['Driver'] == driver]
    
    # Group lap times by circuit
    groups = []
    circuits = driver_data['Circuit'].unique()
    
    for circuit in circuits:
        circuit_lap_times = driver_data[driver_data['Circuit'] == circuit]['LapTimePerKm'].values 
        groups.append(circuit_lap_times)
    
    # Perform One-way ANOVA
    f_stat, p_value = f_oneway(*groups)

    print(f"\nDriver: {driver}")
    print(f"ANOVA p-value: {p_value:.4f}")
    
    if p_value > 0 and p_value < 0.05: 
        # Find circuit with lowest mean lap time (fastest)
        mean_lap_times = driver_data.groupby('Circuit')['LapTimePerKm'].mean()
        best_circuit = mean_lap_times.idxmin()
        best_mean_time = mean_lap_times.min()
        
        print(f"Best Circuit: {best_circuit} (Mean Lap Time: {best_mean_time:.3f} sec)")
    else:
        print("No significant difference between circuits.")

# Display results
for driver, result in driver_best_circuit.items():
    print(f"Driver: {driver}")
    print(f"Best Circuit: {result['best_circuit']}")
    if 'mean_lap_time' in result:
        print(f"Mean Lap Time: {result['mean_lap_time']:.3f} sec")
    print(f"ANOVA p-value: {result['p_value']:.10f}")
    print("------")


Driver: VER
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: NOR
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: RUS
ANOVA p-value: 0.0000
No significant difference between circuits.

Driver: SAI
ANOVA p-value: 0.0000
No significant difference between circuits.


Mixed-Effects Model

In [23]:
# Multi-level model accounting for repeated measures
model = smf.mixedlm("LapTimePerKm ~ C(Compound) + C(Circuit) + Year",
                    data=df, 
                    groups=df["Driver"],
                    re_formula="~TyreLife") 

result = model.fit()
print(result.summary())

                    Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       LapTimePerKm
No. Observations:        8037          Method:                   REML        
No. Groups:              4             Scale:                    0.8971      
Min. group size:         1847          Log-Likelihood:           -11021.5462 
Max. group size:         2125          Converged:                Yes         
Mean group size:         2009.2                                              
-----------------------------------------------------------------------------
                             Coef.   Std.Err.    z    P>|z|  [0.025   0.975] 
-----------------------------------------------------------------------------
Intercept                   -294.751   19.367 -15.219 0.000 -332.710 -256.793
C(Compound)[T.INTERMEDIATE]    2.526    0.116  21.711 0.000    2.298    2.754
C(Compound)[T.MEDIUM]          0.062    0.024   2.604 0.009    0.015    0.108
C(Comp



In [24]:
# Set style and palette
# plt.style.use('seaborn')
palette = {'VER': '#0600EF', 'NOR': '#FF8700', 'RUS': '#00D2BE', 'SAI': '#DC0000'}  # Team colors

# Create figure
fig, ax = plt.subplots(figsize=(12, 8))

# Create boxplot with enhanced parameters
sns.boxplot(
    x='Circuit', 
    y='LapTimePerKm', 
    hue='Driver', 
    data=df, 
    palette=palette,
    linewidth=1.5,
    fliersize=3,
    width=0.7,
    showmeans=True,
    meanprops={
        "markerfacecolor": "white",
        "markeredgecolor": "black",
        "markersize": "6"
    }
)

# Add title and labels with improved formatting
ax.set_title(
    'Lap Time Performance Across Circuits (2021-2024)\nComparison of VER, NOR, RUS, and SAI',
    fontsize=14,
    pad=20,
    fontweight='bold'
)
ax.set_ylabel('Lap Time per Km (s/km)', fontsize=12, labelpad=10)
ax.set_xlabel('Grand Prix', fontsize=12, labelpad=10)  

gp_names = {
    'monaco': "Monaco",         
    'unitedkingdom': "United Kingdom",   
    'emiliaromagna': "Emilia Romagna",   
    'netherlands': "Netherlands", 
    'japan': "Japan",      
    'australia': "Australia",     
    'azerbaijan': "Azerbaijan",   
    'belgium': "Belgium",      
    'mexico': "Mexico",          
    'abudhabi': "Abu Dhabi",       
    'saudiarabia': "Saudi Arabia"     
}
ax.set_xticklabels([gp_names.get(x.get_text(), x.get_text()) for x in ax.get_xticklabels()])

# Rotate x-axis labels and adjust layout
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)

# Add grid for better readability
ax.grid(True, axis='y', linestyle='--', alpha=0.7)

# Improve legend
plt.legend(
    title='Driver',
    title_fontsize=11,
    fontsize=10,
    bbox_to_anchor=(1.05, 1),
    loc='upper left',
    borderaxespad=0.
)

# # Add annotations for insights
# plt.annotate(
#     'Note: Lower lap times indicate better performance',
#     xy=(0.5, -0.2),
#     xycoords='axes fraction',
#     ha='center',
#     fontsize=10,
#     color='gray'
# )

# Adjust layout and save
plt.tight_layout()
plt.savefig('Lap Time Comparisons.png', dpi=300, bbox_inches='tight')
plt.close()

  ax.set_xticklabels([gp_names.get(x.get_text(), x.get_text()) for x in ax.get_xticklabels()])


In [42]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

palette = {'VER': '#0600EF', 'NOR': '#FF8700', 'RUS': '#00D2BE', 'SAI': '#DC0000'}  # Team colors


fig = go.Figure()

fig.add_trace(go.Box(
    y=df[df['Driver'] == "VER"]['LapTimePerKm'].values,
    x=df[df['Driver'] == "VER"]['Circuit'].map(gp_names),
    name='VER',
    marker_color='#0600EF',
    # boxpoints='all',
    # jitter=0.5,
    # whiskerwidth=0.2,
    # marker_size=2,
    # line_width=1
))
fig.add_trace(go.Box(
    y=df[df['Driver'] == "NOR"]['LapTimePerKm'].values,
    x=df[df['Driver'] == "NOR"]['Circuit'].map(gp_names),
    name='NOR',
    marker_color='#FF8700'
))
fig.add_trace(go.Box(
    y=df[df['Driver'] == "RUS"]['LapTimePerKm'].values,
    x=df[df['Driver'] == "RUS"]['Circuit'].map(gp_names),
    name='RUS',
    marker_color='#00D2BE'
))
fig.add_trace(go.Box(
    y=df[df['Driver'] == "SAI"]['LapTimePerKm'].values,
    x=df[df['Driver'] == "SAI"]['Circuit'].map(gp_names),
    name='SAI',
    marker_color='#DC0000'
))

fig.update_layout(
    yaxis=dict(
        title=dict(
            text='Lap Time per Km (s/km)')
    ),
    boxmode='group' # group together boxes of the different traces for each value of x
)


fig.show()

In [62]:
def create_subplot_boxplots(df):
    unique_circuits = df['Circuit'].unique()
    rows = (len(unique_circuits) + 1) // 2  # Calculate number of rows needed
    
    fig = make_subplots(
        rows=rows, 
        cols=2, 
        subplot_titles=[gp_names.get(circuit, circuit) for circuit in unique_circuits]
    )
    
    for i, circuit in enumerate(unique_circuits):
        row = i // 2 + 1
        col = i % 2 + 1
        
        for driver in ['VER', 'NOR', 'RUS', 'SAI']:
            circuit_driver_data = df[(df['Circuit'] == circuit) & (df['Driver'] == driver)]
            
            fig.add_trace(
                go.Box(
                    y=circuit_driver_data['LapTimePerKm'].values,
                    name=driver,
                    marker_color=palette.get(driver),
                    boxpoints='all',
                    jitter=0.3,
                    whiskerwidth=0.2,
                    marker_size=2,
                    line_width=1
                ),
                row=row, 
                col=col
            )
        
    fig.update_layout(
        height=300 * rows,  # Adjust height based on number of rows
        title_text="Lap Times per Kilometer by Circuit and Driver",
        showlegend=False
    )
    
    fig.update_yaxes(title_text='Lap Time per Km (s/km)')
    
    return fig

x = create_subplot_boxplots(df)
x.show()

In [None]:

def create_improved_dropdown_boxplot(df):
    fig = go.Figure()
    
    unique_circuits = df['Circuit'].unique()
    
    # Add traces for each circuit
    for circuit in unique_circuits:
        circuit_data = df[df['Circuit'] == circuit]
        
        # Calculate offsets to spread out box plots
        offset_step = 0.2
        offsets = {
            'VER': -1.5 * offset_step,
            'NOR': -0.5 * offset_step,
            'RUS': 0.5 * offset_step,
            'SAI': 1.5 * offset_step
        }
        
        for driver in ['VER', 'NOR', 'RUS', 'SAI']:
            driver_circuit_data = circuit_data[circuit_data['Driver'] == driver]
            
            # Create x-values with offsets
            x_values = [gp_names.get(circuit, circuit)] * len(driver_circuit_data)
            x_values = [x + offsets[driver] for x in range(len(x_values))]
            
            fig.add_trace(go.Box(
                y=driver_circuit_data['LapTimePerKm'].values,
                x=x_values,
                name=driver,
                marker_color=palette.get(driver),
                visible=(circuit == unique_circuits[0])  # Only first circuit visible initially
            ))
    
    # Create dropdown menu
    dropdown_buttons = []
    for circuit in unique_circuits:
        visibility = [False] * len(fig.data)
        for i, trace in enumerate(fig.data):
            if isinstance(trace.x[0], str) and trace.x[0] == gp_names.get(circuit, circuit):
                visibility[i] = True
        
        dropdown_buttons.append(dict(
            method='update',
            label=gp_names.get(circuit, circuit),
            args=[{'visible': visibility}, 
                  {'title': f'Lap Times for {gp_names.get(circuit, circuit)}'}]
        ))
    
    fig.update_layout(
        updatemenus=[{
            'buttons': dropdown_buttons,
            'direction': 'down',
            'showactive': True,
            'x': 0.1,
            'xanchor': 'left',
            'y': 1.15,
            'yanchor': 'top'
        }],
        yaxis_title='Lap Time per Km (s/km)',
        title=f'Lap Times for {gp_names.get(unique_circuits[0], unique_circuits[0])}',
        height=600,  # Increased height for better visibility
        width=800,   # Increased width
        boxmode='group'
    )
    
    return fig

# Create and show the figure
fig = create_improved_dropdown_boxplot(df)
fig.show()