In [3]:
import fastf1
import pandas as pd
from fastf1 import plotting
from tqdm import tqdm  

In [None]:
import fastf1
import pandas as pd
from fastf1 import plotting
from pathlib import Path

# Enable caching
fastf1.Cache.enable_cache('f1_cache')  # specify your cache directory

def get_merged_race_data(year, race_name, session_type='Race'):
    
    # Load session
    session = fastf1.get_session(year, race_name, session_type)
    session.load(telemetry=True, weather=True, messages=True)
    
    # Get basic lap data
    laps = session.laps
    
    # Add race metadata
    laps['Year'] = year
    laps['RaceName'] = race_name
    laps['SessionType'] = session_type
    
    # Merge weather data (forward fill to match lap times)
    weather = session.weather_data
    weather['Year'] = year
    weather['RaceName'] = race_name
    weather['SessionType'] = session_type
    
    # Merge weather with laps using time
    merged = pd.merge_asof(
        laps.sort_values('Time'),
        weather.sort_values('Time'),
        on='Time',
        direction='nearest'
    )
    
    # Get telemetry data and merge
    telemetry_dfs = []
    for driver in session.drivers:
        try:
            driver_laps = laps.pick_driver(driver)
            if len(driver_laps) > 0:
                telemetry = driver_laps.get_telemetry()
                telemetry['Driver'] = driver
                telemetry_dfs.append(telemetry)
        except Exception as e:
            print(f"Error processing driver {driver}: {e}")
    
    if telemetry_dfs:
        all_telemetry = pd.concat(telemetry_dfs)
        # Merge telemetry with the main DataFrame
        merged = pd.merge_asof(
            merged.sort_values('Time'),
            all_telemetry.sort_values('Time'),
            on='Time',
            by='Driver',
            direction='nearest'
        )
    
    return merged

def collect_and_merge_races(years, race_names, output_file='f1_merged_data.csv', session_type='Race'):
    """
    Collect data for multiple races and merge into a single CSV.
    
    Args:
        years (list): List of years to collect
        race_names (list): List of race names to collect
        output_file (str): Path to output CSV file
        session_type (str): Session type to collect
    """
    all_data = []
    
    for year in years:
        for race in race_names:
            try:
                print(f"Processing {year} {race}...")
                race_data = get_merged_race_data(year, race, session_type)
                all_data.append(race_data)
                print(f"Completed {year} {race}")
            except Exception as e:
                print(f"Error processing {year} {race}: {e}")
    
    if all_data:
        merged_data = pd.concat(all_data, ignore_index=True)
        
        # Clean up and optimize the DataFrame
        merged_data = merged_data.drop_duplicates()
        
        # Convert timedelta columns to strings for CSV compatibility
        timedelta_cols = merged_data.select_dtypes(include=['timedelta64']).columns
        for col in timedelta_cols:
            merged_data[col] = merged_data[col].astype(str)
        
        # Save to single CSV
        merged_data.to_csv(output_file, index=False)
        print(f"All data saved to {output_file}")
        print(f"Total records: {len(merged_data)}")
    else:
        print("No data was collected.")


In [None]:
races = [
    # 2023 Season
    (2023, 'Saudi Arabian', 'R'), 
    (2023, 'Monaco', 'R'),        
    (2023, 'Spain', 'R'),        
    
    # 2022 Season
    (2022, 'Emilia Romagna', 'R'), 
    (2022, 'Canada', 'R'),         
    
    # 2021 Season
    (2021, 'Italy', 'R'),         
    (2021, 'Monaco', 'R'),        
    (2021, 'Azerbaijan', 'R'),    
    
    # 2020 Season
    (2020, 'Styria', 'R'),       
    (2020, '70th Anniversary', 'R'), 
    
    # 2019 Season
    (2019, 'Bahrain', 'R'),       
    (2019, 'Spain', 'R')          
]

In [None]:

years = list(set([race[0] for race in races]))
race_names = list(set([race[1] for race in races]))


collect_and_merge_races(years, race_names, output_file='f1_merged_data.csv', session_type='Race')