Final product

# Imports

In [8]:
import fastf1
import fastf1.plotting
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

print("F1 Tire Degradation Analysis Environment Ready! üèéÔ∏è")

F1 Tire Degradation Analysis Environment Ready! üèéÔ∏è


# Config

In [9]:
# Standard fuel penalty (seconds lost per kg of fuel)
FUEL_PENALTY_PER_KG = 0.03

# Approximate track evolution (seconds gained per lap due to rubbering in)
# Default is 0.03s if track not found.
TRACK_EVO_MAP = {
    'Bahrain': 0.02,   # High abrasion, low evolution
    'Jeddah': 0.06,    # Street circuit, high evolution
    'Australia': 0.05, # Street-ish
    'Baku': 0.06,      # Street
    'Miami': 0.06,     # Street
    'Monaco': 0.07,    # High evolution
    'Spain': 0.02,     # High abrasion
    'Canada': 0.04,
    'Austria': 0.03,
    'Silverstone': 0.03,
    'Hungary': 0.04,
    'Spa': 0.03,
    'Monza': 0.03,
    'Singapore': 0.07, # Street
    'Japan': 0.03,
    'Qatar': 0.02,
    'Austin': 0.03,
    'Mexico': 0.05,    # Low air density, slippery
    'Brazil': 0.03,
    'Las Vegas': 0.08, # New asphalt, high evo
    'Abu Dhabi': 0.04
}

# Functions

In [10]:
def calculate_gaps_to_car_ahead(laps):
    """
    Calculate gaps to car ahead for each lap based on position and timing data.
    This replicates the gap calculation logic for dirty air analysis.
    
    Parameters:
    - laps: FastF1 laps dataframe (raw session.laps)
    
    Returns:
    - DataFrame with gap columns added: Start_Gap, S1_Gap, S2_Gap, S3_Gap, Avg_Gap
    """
    # Work with a copy to avoid modifying original
    laps_with_gaps = laps.copy()
    
    # Add LapTimeSeconds if not present
    if 'LapTimeSeconds' not in laps_with_gaps.columns:
        laps_with_gaps['LapTimeSeconds'] = laps_with_gaps['LapTime'].dt.total_seconds()
    
    # Get accurate laps for creating the "car ahead" reference
    accurate_laps_for_ahead = laps_with_gaps[laps_with_gaps['IsAccurate'] == True].copy()
    
    # Create car_ahead dataframe with timing data
    car_ahead = accurate_laps_for_ahead[['LapStartTime', 'Position', 'LapNumber', 
                                       'Sector1SessionTime', 'Sector2SessionTime', 
                                       'Sector3SessionTime']].copy()
    
    # Create merge key for car ahead (position + 1 = car we're following)
    car_ahead['MergeKey'] = car_ahead['Position'] + 1
    
    # Rename columns to indicate they're for the car ahead
    car_ahead.rename(columns={
        'LapStartTime': 'Start_Ahead', 
        'Sector1SessionTime': 'S1_Ahead', 
        'Sector2SessionTime': 'S2_Ahead', 
        'Sector3SessionTime': 'S3_Ahead'
    }, inplace=True)
    
    # Merge with main laps data to get car ahead timing
    laps_with_gaps = laps_with_gaps.merge(
        car_ahead, 
        left_on=['Position', 'LapNumber'], 
        right_on=['MergeKey', 'LapNumber'], 
        how='left'
    )
    
    # Calculate gaps in seconds
    laps_with_gaps['Start_Gap'] = (laps_with_gaps['LapStartTime'] - laps_with_gaps['Start_Ahead']).dt.total_seconds()
    laps_with_gaps['S1_Gap'] = (laps_with_gaps['Sector1SessionTime'] - laps_with_gaps['S1_Ahead']).dt.total_seconds()
    laps_with_gaps['S2_Gap'] = (laps_with_gaps['Sector2SessionTime'] - laps_with_gaps['S2_Ahead']).dt.total_seconds()
    laps_with_gaps['S3_Gap'] = (laps_with_gaps['Sector3SessionTime'] - laps_with_gaps['S3_Ahead']).dt.total_seconds()
    
    # Calculate average gap across sectors
    laps_with_gaps['Avg_Gap'] = (laps_with_gaps['S1_Gap'] + laps_with_gaps['S2_Gap'] + laps_with_gaps['S3_Gap']) / 3
    
    # Clean up negative gaps (when we're ahead of the reference car)
    laps_with_gaps.loc[laps_with_gaps['Start_Gap'] < 0, 'Start_Gap'] = np.nan
    laps_with_gaps.loc[laps_with_gaps['S1_Gap'] < 0, 'S1_Gap'] = np.nan
    laps_with_gaps.loc[laps_with_gaps['S2_Gap'] < 0, 'S2_Gap'] = np.nan
    laps_with_gaps.loc[laps_with_gaps['S3_Gap'] < 0, 'S3_Gap'] = np.nan
    
    return laps_with_gaps

In [11]:
def compute_dirty_air_feature(df, gap_col='Start_Gap'):
    """
    Convert gap (seconds) to a dirty-air feature using the Start_Gap column.
    We use a smooth transform: small gaps -> strong dirty-air penalty.
    Returns 'dirty_air' where higher = worse (more lap time lost).
    
    Parameters:
    - df: DataFrame with gap data
    - gap_col: Column name for gap data (default 'Start_Gap')
    
    Returns:
    - DataFrame with 'dirty_air' column added (0-1 scale, 1=worst penalty)
    """
    out = df.copy()
    # Use the Start_Gap column that was calculated earlier, or fall back to provided gap_col
    if 'Start_Gap' in out.columns:
        g = out['Start_Gap'].fillna(999.0)  # No car ahead -> large gap (clean air)
    elif gap_col in out.columns:
        g = out[gap_col].fillna(999.0)
    else:
        raise ValueError(f"Neither 'Start_Gap' nor '{gap_col}' found in dataframe")
    
    # Example transform: inverse gap up to a clamp
    clamp = 5.0   # gaps > clamp treated as "clean air"
    g_clamped = np.minimum(g, clamp)
    # Inverse with smoothness; shift to 0..1
    out['dirty_air'] = 1.0 - (g_clamped / clamp) # 1 = bumper-to-bumper, 0 = >clamp clean
    #out['dirty_air'] = (1.0 - (g_clamped / clamp)) ** 2       Test later power transform for more non-linearity
    return out

In [12]:
def fuel_correct_lap_time(df, total_fuel_load=95, fuel_pace_adjustment=0.03, lap_time_col='LapTimeSeconds'):
    """
    Subtract estimated fuel-related time from lap_time based on race progression.
    Assumes steady fuel burn from total_fuel_load kg to ~5kg at race end.
    
    Parameters:
    - df: DataFrame with lap time data
    - total_fuel_load: Starting fuel load in kg (default 95kg, ending ~5kg = 90kg burned)
    - fuel_pace_adjustment: Time loss per kg of fuel (default 0.03s/kg)
    - lap_time_col: Column name for lap times in seconds
    
    Returns:
    - DataFrame with 'fuel_load', 'fuel_time_loss', and 'lap_time_fuel_corrected' columns
    """
    out = df.copy()
    
    # Calculate fuel load based on lap progression within each driver's stint
    # Group by driver to handle each driver separately
    def calculate_fuel_for_driver(driver_df):
        driver_df = driver_df.copy()
        lap_count = len(driver_df)
        
        # Calculate fuel per lap (90kg burned over race distance)
        fuel_per_lap = total_fuel_load / lap_count if lap_count > 0 else 0
        
        # Fuel load decreases linearly: starts high, ends low
        # Create fuel load based on remaining laps (reverse order)
        remaining_laps = np.arange(lap_count, 0, -1)
        driver_df['fuel_load'] = remaining_laps * fuel_per_lap + 5  # +5kg minimum fuel
        
        return driver_df
    
    # Apply fuel calculation per driver
    if 'Driver' in out.columns:
        out = out.groupby('Driver', group_keys=False).apply(calculate_fuel_for_driver)
    else:
        # If no driver column, treat as single driver
        lap_count = len(out)
        fuel_per_lap = 90 / lap_count if lap_count > 0 else 0
        remaining_laps = np.arange(lap_count, 0, -1)
        out['fuel_load'] = remaining_laps * fuel_per_lap + 5
    
    # Calculate time loss due to fuel weight
    out['fuel_time_loss'] = out['fuel_load'] * fuel_pace_adjustment
    
    # Create fuel-corrected lap time
    if lap_time_col not in out.columns:
        # Try to create LapTimeSeconds if LapTime exists
        if 'LapTime' in out.columns:
            out['LapTimeSeconds'] = out['LapTime'].dt.total_seconds()
        else:
            raise ValueError(f"Column '{lap_time_col}' not found and cannot create it")
    
    out['lap_time_fuel_corrected'] = out[lap_time_col] - out['fuel_time_loss']
    
    return out

In [13]:
def apply_track_evolution(df, session):
    """
    Normalizes lap times to 'Green Track' (Lap 0) pace.
    It ADDS time to later laps to compensate for the track getting faster.
    """
    out = df.copy()
    
    # Get Circuit Name safely
    try:
        circuit_name = session.event.EventName
        # Simple string matching to find key in map
        evo_factor = 0.03 # Default
        for key, val in TRACK_EVO_MAP.items():
            if key in circuit_name or key in session.event.Location:
                evo_factor = val
                break
    except:
        evo_factor = 0.03
        
    print(f"üåç Track Evolution set to: {evo_factor}s per lap")
    
    # Logic:
    # Lap 50 is 2.0s faster than Lap 0 due to track.
    # To compare Lap 50 tire to Lap 0 tire, we must ADD 2.0s to Lap 50.
    track_evo_correction = out['LapNumber'] * evo_factor
    
    # Apply to the Fuel Corrected Column if exists, otherwise raw
    if 'lap_time_fuel_corrected' in out.columns:
        out['lap_time_fuel_corrected'] = out['lap_time_fuel_corrected'] + track_evo_correction
    else:
        out['lap_time_fuel_corrected'] = out['LapTimeSeconds'] + track_evo_correction
        
    return out

In [14]:
def filter_clean_laps(df, is_accurate_col='IsAccurate', min_speed=80.0, speed_col='AverageSpeed'):
    """
    Filter laps to only include "clean" laps for analysis.
    
    Criteria:
    - is_accurate_col == True (excludes outlaps, safety car, etc.)
    - speed_col >= min_speed (if column exists)
    
    Parameters:
    - df: DataFrame with lap data
    - is_accurate_col: Column name for accuracy flag
    - min_speed: Minimum average speed threshold
    - speed_col: Column name for speed data
    
    Returns:
    - Filtered DataFrame with only clean laps
    """
    out = df.copy()
    
    # Always filter by accuracy
    mask = (out[is_accurate_col] == True)
    
    # Add speed filter if column exists
    if speed_col in out.columns:
        mask = mask & (out[speed_col] >= min_speed)
    
    return out[mask]

In [15]:
def process_race_for_tire_analysis(session, include_weather=True):
    """
    Complete pipeline to process a race session for tire degradation analysis.
    Takes raw session and returns clean laps with all corrections applied.
    
    This is the main function that combines all the processing steps.
    
    Parameters:
    - session: FastF1 session object (already loaded with session.load())
    - include_weather: Whether to merge weather data (default True)
    
    Returns:
    - DataFrame ready for tire degradation analysis with columns:
      * lap_time_fuel_corrected: Fuel-corrected lap times
      * dirty_air: Dirty air penalty feature (0-1)
      * fuel_load: Calculated fuel load per lap
      * Start_Gap, S1_Gap, etc.: Gap timing to car ahead
      * Weather data (if available): TrackTemp, AirTemp, etc.
    """
    print("üèéÔ∏è  Processing race data for tire analysis...")
    
    # Step 1: Get raw laps
    laps = session.laps.copy()
    
    # 1. Gaps
    laps = calculate_gaps_to_car_ahead(laps)
    
    # 2. Dirty Air Feature
    laps = compute_dirty_air_feature(laps)
    
    # 3. Filter Clean Laps
    laps = filter_clean_laps(laps)
    
    # 4. Fuel Correction (Normalize to Empty Tank)
    laps = fuel_correct_lap_time(laps)
    
    # 5. Track Evolution (Normalize to Green Track)
    laps = apply_track_evolution(laps, session)
    
    # Clean up output
    cols_to_keep = [
        'Driver', 'LapNumber', 'Compound', 'TyreLife', 
        'LapTime', 'LapTimeSeconds', 'lap_time_fuel_corrected',
        'dirty_air', 'Start_Gap', 'Stint'
    ]
    # Only keep columns that actually exist
    existing_cols = [c for c in cols_to_keep if c in laps.columns]
    
    return laps[existing_cols]