In [1]:
# Add this at the start of your notebook
import logging

# Clear any existing handlers
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Configure logging to display in notebook
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True  # This will override any existing configuration
)

# Test the logging
logging.debug("Test debug message")
logging.info("Test info message")

2024-12-07 13:00:22,361 - DEBUG - Test debug message
2024-12-07 13:00:22,363 - INFO - Test info message


In [2]:
import pandas as pd
import numpy as np
import torch
from pathlib import Path
import logging
import sys

# If running this in a Jupyter notebook, ensure plots are displayed inline by adding:
# %matplotlib inline

# Go two directories up to reach 'race_simulation' directory
sys.path.append('../../')

from common.race import Race
from common.driver import Driver
from common.features import RaceFeatures
from common.data_preparation import load_data_splits
from common.evaluation import evaluate_race_simulation
from common.race_utils import extract_pit_strategies, extract_safety_car_periods, get_race_length
from models.lstm.race_simulator_lstm import LSTMRaceSimulator
from models.lstm.lstm_model import load_model_with_preprocessor
from common.utils import plot_race_positions, plot_lap_times, plot_actual_vs_predicted

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def initialize_drivers(
    drivers_df: pd.DataFrame, 
    preprocessor, 
    race_features: RaceFeatures, 
    race: Race, 
    lap_times_df: pd.DataFrame, 
    special_laps: pd.DataFrame,
    circuit_attributes_df: pd.DataFrame
) -> list:
    """
    Initializes driver objects for the simulation.
    """

    pit_strategies = extract_pit_strategies(special_laps, race.race_id)
    drivers_race_df = drivers_df[drivers_df['raceId'] == race.race_id]

    if drivers_race_df.empty:
        raise ValueError(f"No drivers found for raceId {race.race_id}")

    # Extract starting grid positions
    grid_positions = lap_times_df[
        (lap_times_df['raceId'] == race.race_id) & (lap_times_df['lap'] == 1)
    ][['driverId', 'grid']].drop_duplicates()
    grid_mapping = grid_positions.set_index('driverId')['grid'].to_dict()

    # Fetch circuit attributes
    circuit_matches = circuit_attributes_df[circuit_attributes_df['circuitId'] == race.circuit_id]
    if circuit_matches.empty:
        # If no circuit attributes, use defaults
        logging.warning(f"No circuit attributes found for circuitId {race.circuit_id}. Using defaults.")
        circuit_length_val = 5.0
        circuit_type_encoded_val = 0
        alt_val = 0.0
    else:
        circuit_data = circuit_matches.iloc[0]
        circuit_length_val = circuit_data.get('circuit_length', 5.0)
        circuit_type_encoded_val = circuit_data.get('circuit_type_encoded', 0)
        alt_val = circuit_data.get('alt', 0.0)

    drivers = []
    for idx, row in drivers_race_df.iterrows():
        driver_id = row['driverId']
        grid_position = grid_mapping.get(driver_id, len(drivers_race_df))
        pit_strategy_info = pit_strategies.get(driver_id, {})
        starting_compound = pit_strategy_info.get('starting_compound', 2)
        pit_strategy = pit_strategy_info.get('pit_strategy', [])
        constructor_id = row.get('constructorId', -1)

        # Build static_features_dict
        static_features_dict = {
            'driver_overall_skill': row['driver_overall_skill'],
            'driver_circuit_skill': row['driver_circuit_skill'],
            'driver_consistency': row['driver_consistency'],
            'driver_reliability': row['driver_reliability'],
            'driver_aggression': row['driver_aggression'],
            'driver_risk_taking': row['driver_risk_taking'],
            'constructor_performance': row['constructor_performance'],
            'fp1_median_time': row['fp1_median_time'],
            'fp2_median_time': row['fp2_median_time'],
            'fp3_median_time': row['fp3_median_time'],
            'quali_time': row['quali_time'],
            'circuit_length': circuit_length_val,
            'circuit_type_encoded': circuit_type_encoded_val,
            'alt': alt_val
        }

        # Create the static features array
        static_features = np.array([static_features_dict[feature] for feature in preprocessor.static_feature_names])

        # Prepare initial dynamic features
        initial_dynamic_features = {
            'tire_age': 0,
            'fuel_load': 100.0,
            'track_position': grid_position,
            'TrackTemp': 35.0,
            'AirTemp': 25.0,
            'Humidity': 50.0,
            'TrackStatus': 1,
            'is_pit_lap': 0,
            'tire_compound': starting_compound,
            'cumulative_race_time': 0.0,
            'GapToLeader_ms': 0.0,
            'IntervalToPositionAhead_ms': 0.0
        }

        driver = Driver(
            driver_id=driver_id,
            name=row.get('driverName', f"Driver {driver_id}"),
            static_features=static_features,
            initial_dynamic_features=initial_dynamic_features,
            start_position=grid_position,
            pit_strategy=pit_strategy,
            starting_compound=starting_compound,
            constructor_id=constructor_id
        )

        # Scale static features
        driver.static_features = preprocessor.transform_static_features(static_features).flatten()

        # Initialize sequence with zeros
        driver.sequence = np.zeros((preprocessor.window_size, len(race_features.dynamic_features) + 1))

        # Determine initial lap time based on FP or fallback to quali_time
        fp_times = []
        for fp_col in ['fp1_median_time', 'fp2_median_time', 'fp3_median_time']:
            val = row.get(fp_col, np.nan)
            # Only consider if it's not NaN and > 0
            if pd.notna(val) and val > 0:
                fp_times.append(val)

        # If we have FP times, average them (assuming they are in seconds) and convert to ms
        # If no FP times, fallback to quali_time in ms
        if fp_times:
            initial_lap_time = np.mean(fp_times) * 1000.0
        else:
            initial_lap_time = row['quali_time'] * 1000.0

        # Scale the initial lap time and dynamic features to create initial sequence entry
        initial_lap_time_scaled = preprocessor.lap_time_scaler.transform([[initial_lap_time]])[0][0]
        dynamic_values = [driver.dynamic_features[f] for f in race_features.dynamic_features]
        dynamic_features_scaled = preprocessor.dynamic_scaler.transform(np.array(dynamic_values).reshape(1, -1)).flatten()

        sequence_entry = np.concatenate(([initial_lap_time_scaled], dynamic_features_scaled))
        driver.sequence = np.tile(sequence_entry, (preprocessor.window_size, 1))

        drivers.append(driver)

    drivers.sort(key=lambda x: x.start_position)
    logging.info(f"Initialized {len(drivers)} drivers for raceId {race.race_id}")
    return drivers




def main():
    # Load train/test split
    train_df, test_df = load_data_splits()

    # Load model and preprocessor
    model_path = '../../models/lstm/lstm_model_optuna_tuned.pth'
    model, preprocessor = load_model_with_preprocessor(model_path)

    # Load data
    lap_times = pd.read_csv('../../data/LAPS.csv', na_values=['\\N', 'NaN', ''])
    special_laps = pd.read_csv('../../data/SPECIAL_LAPS.csv', na_values=['\\N', 'NaN', ''])
    drivers_df = pd.read_csv('../../data/util/drivers_attributes.csv', na_values=['\\N', 'NaN', ''])
    circuit_attributes_df = pd.read_csv('../../data/util/circuit_attributes.csv', na_values=['\\N', 'NaN', ''])
    constructors = pd.read_csv('../../../data/raw_data/constructors.csv', na_values=['\\N', 'NaN', ''])

    # Just an example: Weather data might be integrated as cumulative times if desired.
    # If the simulator expects a 'weather_df', ensure you have it prepared. For now, let's assume 
    # weather_df is integrated into the LAPS.csv as done previously:
    weather_df = lap_times[['raceId', 'seconds_from_start', 'TrackTemp', 'AirTemp', 'Humidity', 'cumulative_milliseconds']].drop_duplicates()
    weather_df = weather_df.dropna(subset=['TrackTemp', 'AirTemp', 'Humidity', 'seconds_from_start', 'cumulative_milliseconds'])

    # Create constructor mapping
    constructor_mapping = constructors.set_index('constructorId')['name'].str.lower().to_dict()

    if 'code' in drivers_df.columns:
        driver_code_mapping = drivers_df.set_index('driverId')['code'].to_dict()
    else:
        drivers_df['code'] = drivers_df['forename'].str[0].str.upper() + drivers_df['surname'].str[:2].str.upper()
        driver_code_mapping = drivers_df.set_index('driverId')['code'].to_dict()

    TEAM_COLORS = {
        'alpine': '#fe86bc',
        'aston martin': '#006f62',
        'ferrari': '#dc0000',
        'haas': '#B6BABD',
        'mclaren': '#ff8700',
        'mercedes': '#27F4D2',
        'red bull': '#3671C6',
        'sauber': '#52E252',
        'williams': '#64C4FF',
        'rb': '#6692FF'
    }

    # During race simulation
    simulator = LSTMRaceSimulator(model, preprocessor, weather_df)
    simulator.clear_input_data()  # Clear any old data
    race_result = simulator.simulate_race(race)

    # After simulation, get the data
    input_df = simulator.get_input_data_df()

    # Basic analysis
    print("Input data shape:", input_df.shape)
    print("\nFeature statistics:")
    print(input_df.describe())

    # Check pit stop predictions
    pit_stops = input_df[input_df['is_pit_lap'] == 1]
    print("\nPit stop lap time predictions:")
    print(pit_stops[['lap', 'driver_id', 'predicted_lap_time']].head())

    # Save for further analysis
    input_df.to_csv('race_simulation_inputs.csv', index=False)

    all_results = []
    test_races = test_df['raceId'].unique()

    Path('../00_results/lstm').mkdir(parents=True, exist_ok=True)
    Path('../01_plots/lstm').mkdir(parents=True, exist_ok=True)

    for race_id in test_races:
        race_data = test_df[test_df['raceId'] == race_id]
        if race_data.empty:
            logging.warning(f"No data for race {race_id}, skipping...")
            continue

        race_length = get_race_length(race_id, lap_times)
        safety_car_periods = extract_safety_car_periods(special_laps, race_id)

        # When creating the Race instance:
        race = Race(
            race_id=race_id,
            circuit_id=race_data['circuitId'].iloc[0],
            total_laps=race_length,
            circuit_length=race_data['circuit_length'].iloc[0],  # Add this line
            weather_conditions={},
            safety_car_periods=safety_car_periods
        )

        race_features = RaceFeatures()
        drivers = initialize_drivers(drivers_df, preprocessor, race_features, race, lap_times, special_laps, circuit_attributes_df)
        race.drivers.extend(drivers)

        # Simulate the race
        race_lap_data = simulator.simulate_race(race)

        # After simulating each race:
        sim_results = evaluate_race_simulation(race, race_data)
        sim_results['metrics']['race_id'] = race_id
        all_results.append(sim_results['metrics'])

        detailed_df = sim_results['detailed_results']
        detailed_df.to_csv(f'../00_results/lstm/race_{race_id}_detailed.csv', index=False)

        plot_race_positions(
            race=race,
            constructor_mapping=constructor_mapping,
            driver_code_mapping=driver_code_mapping,
            TEAM_COLORS=TEAM_COLORS,
            save_path=f'../01_plots/lstm/race_{race_id}_positions.png'
        )
        plot_lap_times(
            race=race,
            constructor_mapping=constructor_mapping,
            driver_code_mapping=driver_code_mapping,
            TEAM_COLORS=TEAM_COLORS,
            save_path=f'../01_plots/lstm/race_{race_id}_lap_times.png'
        )

        # Now plot actual vs predicted lap times:
        plot_actual_vs_predicted(race, test_df)

        #logging.info(f"Results for Race {race_id}: RMSE: {sim_results['metrics']['rmse']:.2f}, MAE: {sim_results['metrics']['mae']:.2f}")

    if not all_results:
        logging.error("No successful race simulations completed")
        return

    results_df = pd.DataFrame(all_results)
    overall_metrics = results_df.agg({
        'rmse': ['mean', 'std'],
        'mae': ['mean', 'std'],
        'positions_correct': ['mean', 'std'],
        'position_changes_accuracy': ['mean', 'std'],
        'num_drivers': 'mean'
    }).round(3)
    results_df.to_csv('../00_results/lstm/simulation_evaluation.csv', index=False)
    overall_metrics.to_csv('../00_results/lstm/simulation_overall_metrics.csv')

if __name__ == "__main__":
    main()


2024-12-07 13:00:24,314 - DEBUG - matplotlib data path: /Users/I551659/Library/Caches/pypoetry/virtualenvs/ie500-data-mining-group7-LKR-OXJO-py3.12/lib/python3.12/site-packages/matplotlib/mpl-data
2024-12-07 13:00:24,317 - DEBUG - CONFIGDIR=/Users/I551659/.matplotlib
2024-12-07 13:00:24,327 - DEBUG - interactive is False
2024-12-07 13:00:24,328 - DEBUG - platform is darwin
2024-12-07 13:00:24,356 - DEBUG - CACHEDIR=/Users/I551659/.matplotlib
2024-12-07 13:00:24,358 - DEBUG - Using fontManager instance from /Users/I551659/.matplotlib/fontlist-v390.json
2024-12-07 13:00:24,776 - INFO - Model and preprocessor loaded from ../../models/lstm/lstm_model_optuna_tuned.pth


UnboundLocalError: cannot access local variable 'race' where it is not associated with a value