In [24]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import pickle
import json


In [25]:
# Load reference files
races = pd.read_csv('../data/races.csv')
circuits = pd.read_csv('../data/circuits.csv')
pit_stops = pd.read_csv('../data/pit_stops.csv')

# Path to cleaned data directory
cleaned_data_path = '../cleaned_data'


In [29]:
# Dictionary to store processed race data
races_cleaned = {}

# Load cleaned data
for race_folder in os.listdir(cleaned_data_path):
    try:
        race_id = int(race_folder.replace('race_', ''))
    except ValueError:
        continue  # Skip invalid folders
    
    race_path = os.path.join(cleaned_data_path, race_folder)
    
    try:
        lap_times = pd.read_csv(os.path.join(race_path, 'lap_times.csv'))
        gaps_to_leader = pd.read_csv(os.path.join(race_path, 'gaps_to_leader.csv'))
        gaps_to_car_ahead = pd.read_csv(os.path.join(race_path, 'gaps_to_car_ahead.csv'))
        
        with open(os.path.join(race_path, 'pit_stops.json'), 'r') as f:
            pit_stops_data = json.load(f)
        
        # Get circuitId from races.csv
        if race_id in races['raceId'].values:
            circuit_id = races.loc[races['raceId'] == race_id, 'circuitId'].values[0]

            races_cleaned[race_id] = {
                'circuitId': circuit_id,
                'lap_times': lap_times,
                'gaps_to_leader': gaps_to_leader,
                'gaps_to_car_ahead': gaps_to_car_ahead,
                'pit_stops': pit_stops_data
            }
        else:
            print(f"Race ID {race_id} not found in races.csv — skipping")

    except FileNotFoundError as e:
        print(f"Missing file in race folder {race_folder}: {e}")
    except Exception as e:
        print(f"Error processing race folder {race_folder}: {e}")

In [40]:
rand_raceID = 1100
print("Cleaned data keys")
print(races_cleaned[rand_raceID].keys())
print("cleaned data race laptime keys")
print(races_cleaned[rand_raceID]['lap_times'].keys())


Cleaned data keys
dict_keys(['circuitId', 'lap_times', 'gaps_to_leader', 'gaps_to_car_ahead', 'pit_stops'])
cleaned data race laptime keys
Index(['lap', '1', '4', '807', '815', '822', '825', '830', '832', '839', '840',
       '842', '846', '847', '848', '852', '855', '856', '857', '858'],
      dtype='object')


In [33]:
def detect_pit_stops(lap_times):
    pit_stops = []
    drivers = lap_times['driverId'].unique()
    
    for driver in drivers:
        driver_laps = lap_times[lap_times['driverId'] == driver].sort_values('lap')
        times = driver_laps['milliseconds'].values

        for i in range(1, len(times)):
            delta = times[i] - times[i-1]
            if delta > 20000:  # Large jump suggests pit exit (~20 seconds)
                pit_stops.append((driver, driver_laps.iloc[i]['lap']))

    return pit_stops

#example usage
# Select a single race for testing
race_id = list(races_cleaned.keys())[0]
test_lap_times = races_cleaned[race_id]['lap_times']
# Detect pit stops in the selected race
detected_pit_stops = detect_pit_stops(test_lap_times)
print(f"Detected Pit Stops for Race {race_id}: {detected_pit_stops}")

KeyError: 'driverId'