# USAGE EXAMPLES

### QUICK START - Run this in notebook cells:

# 1. Test driver encoder for a session
```
session_config = SessionConfig(year=2024, race="Monaco Grand Prix", session_type="R")
driver_encoder = setup_driver_encoder(session_config)
print("Driver mappings:", driver_encoder.driver_to_number)
```

# 2. Single experiment
```
result = run_quick_test()
```

# 3. Custom experiment with different drivers
```
# First check available drivers for the session
DATA_SCOPES["custom"] = {
    "sessions": [SessionConfig(year=2024, race="Monaco Grand Prix", session_type="R")],
    "drivers": ["HAM", "LEC"]  # Will be auto-converted to numbers
}
custom_result = run_single_experiment(
    scope_name="custom", 
    window_config={"window_size": 150, "prediction_horizon": 5}
)
```

# 4. Run all experiments (warning: takes time!)
```
all_results = run_all_experiments()
summary_df = create_summary_report(all_results)
print(summary_df)
```

# 5. Manual step-by-step for debugging
```
dataset = create_dataset("one_session_all_drivers", WINDOW_CONFIGS[0])
X_train, X_test, y_train, y_test = prepare_data(dataset)
class_names, class_dist = analyze_class_distribution(dataset, y_train)

models = create_models()
rf_result = train_single_model(
    models["random_forest"], "random_forest", 
    X_train, X_test, y_train, y_test
)
```

In [8]:
# F1 Safety Car Prediction - Step-by-Step Jupyter Notebook Implementation

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix
from aeon.classification.feature_based import Catch22Classifier
from aeon.classification.dummy import DummyClassifier
from f1_etl import SessionConfig, DataConfig, create_safety_car_dataset, DriverLabelEncoder, FixedVocabTrackStatusEncoder
from f1_etl.config import create_multi_session_configs
import fastf1

In [11]:
# ============================================================================
# STEP 1: CONFIGURATION
# ============================================================================

def setup_driver_encoder(session_config):
    """Create driver encoder for a given session"""
    
    # Load FastF1 session to get driver mappings
    session = fastf1.get_session(
        session_config.year, 
        session_config.race, 
        session_config.session_type
    )
    session.load()
    
    # Create and fit driver encoder
    driver_encoder = DriverLabelEncoder()
    driver_encoder.fit_session(session)
    
    print(f"✅ Driver encoder fitted for {session_config.race}")
    print(f"Available drivers: {list(driver_encoder.driver_to_number.keys())}")
    
    return driver_encoder

def setup_experiment_config():
    """Define experimental parameters"""
    
    # Window configurations to test
    WINDOW_CONFIGS = [
        {"window_size": 200, "prediction_horizon": 10},
        {"window_size": 300, "prediction_horizon": 15},
        {"window_size": 250, "prediction_horizon": 20}
    ]
    
    # Data scope configurations (without driver mapping - will be handled dynamically)
    DATA_SCOPES = {
        "one_session_one_driver": {
            "sessions": [SessionConfig(year=2024, race="Monaco Grand Prix", session_type="R")],
            "drivers": ["VER"]  # Will be converted to numbers dynamically
        },
        "one_session_all_drivers": {
            "sessions": [SessionConfig(year=2024, race="Monaco Grand Prix", session_type="R")],
            "drivers": None
        },
        "whole_season_one_driver": {
            "sessions": create_multi_session_configs(2024, ['R'], False)
        }
    }
    
    return WINDOW_CONFIGS, DATA_SCOPES

# Run this first
# WINDOW_CONFIGS, DATA_SCOPES = setup_experiment_config()
# print("✅ Configuration loaded")
# print(f"Window configs: {len(WINDOW_CONFIGS)}")
# print(f"Data scopes: {list(DATA_SCOPES.keys())}")

In [5]:
import fastf1
from f1_etl import DriverLabelEncoder

session_sa = fastf1.get_session(2024, "Saudi Arabian Grand Prix", 'R')
session_mon = fastf1.get_session(2024, "Monaco Grand Prix", 'R')

session_sa.load()
session_mon.load()

driver_enc_sa = DriverLabelEncoder()
driver_enc_sa.fit_session(session_sa)

driver_enc_mon = DriverLabelEncoder()
driver_enc_mon.fit_session(session_mon)

core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Monaco Grand Prix 

<f1_etl.encoders.DriverLabelEncoder at 0x12ab4f890>

In [6]:
driver_enc_sa.driver_to_number

{'VER': '1',
 'PER': '11',
 'LEC': '16',
 'PIA': '81',
 'ALO': '14',
 'RUS': '63',
 'BEA': '38',
 'NOR': '4',
 'HAM': '44',
 'HUL': '27',
 'ALB': '23',
 'MAG': '20',
 'OCO': '31',
 'SAR': '2',
 'TSU': '22',
 'RIC': '3',
 'BOT': '77',
 'ZHO': '24',
 'STR': '18',
 'GAS': '10'}

In [7]:
driver_enc_mon.driver_to_number

{'LEC': '16',
 'PIA': '81',
 'SAI': '55',
 'NOR': '4',
 'RUS': '63',
 'VER': '1',
 'HAM': '44',
 'TSU': '22',
 'ALB': '23',
 'GAS': '10',
 'ALO': '14',
 'RIC': '3',
 'BOT': '77',
 'STR': '18',
 'SAR': '2',
 'ZHO': '24',
 'OCO': '31',
 'PER': '11',
 'HUL': '27',
 'MAG': '20'}

In [9]:
sessions_2024_season = create_multi_session_configs(
    year=2024, 
    session_types=['R'], 
    include_testing=False
)
data_config_2024_season_ver = DataConfig(
    sessions=sessions_2024_season, 
    drivers=['1'], 
    include_weather=False
)

2025-07-01 21:48:19,548 - f1_etl - INFO - Generated 24 SessionConfig objects for 2024 season


In [10]:
data_config = data_config_2024_season_ver

In [47]:
dataset = create_safety_car_dataset(
    config=data_config,
    window_size=100,
    prediction_horizon=10,
    handle_non_numeric="encode",
    handle_missing=True,
    missing_strategy="forward_fill",
    normalize=True,
    normalization_method="per_sequence",
    target_column="TrackStatus",
    enable_debug=False
)

2025-07-01 22:31:11,777 - f1_etl - INFO - Preprocessing configuration:
2025-07-01 22:31:11,779 - f1_etl - INFO -   Missing values: enabled (forward_fill)
2025-07-01 22:31:11,780 - f1_etl - INFO -   Normalization: enabled (per_sequence)


Loading session: 2024 Bahrain Grand Prix R


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix

Loading session: 2024 Saudi Arabian Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Australian Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Japanese Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Chinese Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Miami Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Emilia Romagna Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data


Loading session: 2024 Monaco Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Canadian Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data

Loading session: 2024 Spanish Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Austrian Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 British Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Hungarian Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Belgian Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Dutch Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data

Loading session: 2024 Italian Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Azerbaijan Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Singapore Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for United States Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 United States Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Mexico City Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_

Loading session: 2024 São Paulo Grand Prix R


req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Las Vegas Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Qatar Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading session: 2024 Abu Dhabi Grand Prix R


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']
2025-07-01 22:37:24,007 - f1_etl - INFO - Creating new fixed vocabulary encoder



📊 Track Status Analysis (training_data):
   green       : 1540436 samples ( 91.9%)
   red         : 33594 samples (  2.0%)
   safety_car  : 47826 samples (  2.9%)
   vsc         : 10001 samples (  0.6%)
   vsc_ending  :   461 samples (  0.0%)
   yellow      : 43072 samples (  2.6%)
   Missing classes: [np.str_('unknown')]
✅ FixedVocabTrackStatusEncoder fitted
   Classes seen: ['green', 'red', 'safety_car', 'vsc', 'vsc_ending', 'yellow']
   Total classes: 7
   Output mode: integer labels


2025-07-01 22:37:24,520 - f1_etl - INFO - Processing 1675390 total telemetry rows
2025-07-01 22:37:24,520 - f1_etl - INFO - Grouping by: ['SessionId', 'Driver']
2025-07-01 22:37:31,949 - f1_etl - INFO - Total sequences generated: 33467
2025-07-01 22:37:32,136 - f1_etl - INFO - Generated 33467 sequences with shape (33467, 100, 9)
2025-07-01 22:37:32,168 - f1_etl - INFO - Applying missing value imputation with strategy: forward_fill
2025-07-01 22:37:32,178 - f1_etl - INFO - Handling missing values with strategy: forward_fill
2025-07-01 22:37:32,575 - f1_etl - INFO - Applying normalization with method: per_sequence
2025-07-01 22:37:33,289 - f1_etl - INFO - Final dataset summary:
2025-07-01 22:37:33,289 - f1_etl - INFO -   Sequences: 33467
2025-07-01 22:37:33,290 - f1_etl - INFO -   Features: 9
2025-07-01 22:37:33,290 - f1_etl - INFO -   Classes: 7 (integer)
2025-07-01 22:37:33,290 - f1_etl - INFO -   Label shape: (33467,)
2025-07-01 22:37:33,290 - f1_etl - INFO -     green       : 30780 s

In [None]:
# ============================================================================
# STEP 2: DATA GENERATION
# ============================================================================

# def create_dataset(scope_name, window_config, cache_dir="./f1_cache"):
#     """Create a single dataset for given scope and window configuration"""
    
#     scope = DATA_SCOPES[scope_name]
    
#     # Convert driver abbreviations to numbers using DriverLabelEncoder
#     drivers = None
#     if scope["drivers"]:
#         # Get the session to create driver encoder
#         session_config = scope["sessions"][0]  # Use first session for driver mapping
#         driver_encoder = setup_driver_encoder(session_config)
        
#         # Convert abbreviations to numbers
#         try:
#             drivers = driver_encoder.transform_driver_to_number(scope["drivers"])
#             print(f"Mapped drivers: {scope['drivers']} -> {drivers}")
#         except KeyError as e:
#             print(f"❌ Driver not found: {e}")
#             available_drivers = list(driver_encoder.driver_to_number.keys())
#             print(f"Available drivers: {available_drivers}")
#             raise ValueError(f"Driver mapping failed. Available: {available_drivers}")
    
#     # Create data configuration
#     config = DataConfig(
#         sessions=scope["sessions"],
#         cache_dir=cache_dir,
#         drivers=drivers
#     )
    
#     # Generate dataset
#     print(f"Generating dataset: {scope_name}, window={window_config['window_size']}, horizon={window_config['prediction_horizon']}")
#     dataset = create_safety_car_dataset(
#         config=config,
#         window_size=window_config["window_size"],
#         prediction_horizon=window_config["prediction_horizon"],
#         handle_non_numeric="encode",
#         handle_missing=False,
#         missing_strategy="forward_fill",
#         normalize=True,
#         normalization_method="per_sequence",
#         target_column="TrackStatus",
#         enable_debug=False
#     )
    
#     return dataset

# # Example: Create a single dataset
# dataset = create_dataset("one_session_all_drivers", WINDOW_CONFIGS[0])

2025-07-01 13:37:56,918 - f1_etl - INFO - Preprocessing configuration:
2025-07-01 13:37:56,919 - f1_etl - INFO -   Missing values: disabled (forward_fill)
2025-07-01 13:37:56,919 - f1_etl - INFO -   Normalization: enabled (per_sequence)


Generating dataset: one_session_all_drivers, window=200, horizon=10
Loading session: 2024 Monaco Grand Prix R
Loading from cache: f1_cache/2024_Monaco_Grand_Prix_R.pkl


2025-07-01 13:38:02,293 - f1_etl - INFO - Processing 1833280 total telemetry rows
2025-07-01 13:38:02,295 - f1_etl - INFO - Grouping by: ['SessionId', 'Driver']
2025-07-01 13:38:07,128 - f1_etl - INFO - Total sequences generated: 18300
2025-07-01 13:38:07,350 - f1_etl - INFO - Generated 18300 sequences with shape (18300, 200, 9)
2025-07-01 13:38:07,350 - f1_etl - INFO - Missing value handling disabled
2025-07-01 13:38:07,388 - f1_etl - INFO - Applying normalization with method: per_sequence


In [48]:
# ============================================================================
# STEP 3: DATA PREPARATION
# ============================================================================

def prepare_data(dataset, test_size=0.2):
    """Prepare train/test splits and convert to Aeon format"""
    
    X = dataset['X']  # Shape: (n_samples, n_timesteps, n_features)
    y = dataset['y']  # Encoded labels
    
    # Convert to Aeon format: (n_samples, n_features, n_timesteps)
    X_aeon = X.transpose(0, 2, 1)
    
    # Use only Speed feature (index 0) for simplicity
    # X_speed = X_aeon[:, 0:1, :]  # Keep 3D: (n_samples, 1, n_timesteps)
    
    # Train/test split with stratification
    X_train, X_test, y_train, y_test = train_test_split(
        # X_speed, y, test_size=test_size, random_state=42, stratify=y
        X_aeon, y, test_size=test_size, random_state=42, stratify=y
    )
    
    return X_train, X_test, y_train, y_test

def analyze_class_distribution(dataset, y_train):
    """Analyze and display class distribution"""
    
    # Get class names
    label_encoder = dataset['label_encoder']
    class_names = label_encoder.get_classes()
    
    # Count classes
    unique, counts = np.unique(y_train, return_counts=True)
    
    print("\n=== CLASS DISTRIBUTION ===")
    for class_id, count in zip(unique, counts):
        class_name = class_names[class_id] if class_id < len(class_names) else f"Class_{class_id}"
        percentage = count / len(y_train) * 100
        print(f"{class_name:12s}: {count:5d} samples ({percentage:5.1f}%)")
    
    imbalance_ratio = max(counts) / min(counts)
    print(f"\nImbalance ratio: {imbalance_ratio:.1f}:1")
    
    return class_names, dict(zip(unique, counts))

# Example usage:
X_train, X_test, y_train, y_test = prepare_data(dataset)
class_names, class_dist = analyze_class_distribution(dataset, y_train)


=== CLASS DISTRIBUTION ===
green       : 24623 samples ( 92.0%)
red         :   538 samples (  2.0%)
safety_car  :   764 samples (  2.9%)
vsc         :   161 samples (  0.6%)
vsc_ending  :     7 samples (  0.0%)
yellow      :   680 samples (  2.5%)

Imbalance ratio: 3517.6:1


In [62]:
X_train.shape

(26773, 9, 100)

In [49]:
y_enc = dataset['label_encoder']
y_enc.class_to_idx

{np.str_('green'): 0,
 np.str_('red'): 1,
 np.str_('safety_car'): 2,
 np.str_('unknown'): 3,
 np.str_('vsc'): 4,
 np.str_('vsc_ending'): 5,
 np.str_('yellow'): 6}

In [57]:
# class_weight = {
#     y_enc.class_to_idx['green']: 1.0,        # 0
#     y_enc.class_to_idx['red']: 10.0,         # 1  
#     y_enc.class_to_idx['safety_car']: 20.0,  # 2 (your target class)
#     y_enc.class_to_idx['unknown']: 5.0,      # 3
#     y_enc.class_to_idx['vsc']: 30.0,         # 4
#     y_enc.class_to_idx['vsc_ending']: 100.0, # 5
#     y_enc.class_to_idx['yellow']: 8.0        # 6
# }

class_weight = {
    y_enc.class_to_idx['green']: 1.0,
    y_enc.class_to_idx['red']: 25.0,         
    y_enc.class_to_idx['safety_car']: 100.0,  # Much higher
    y_enc.class_to_idx['unknown']: 25.0,      
    y_enc.class_to_idx['vsc']: 50.0,
    y_enc.class_to_idx['vsc_ending']: 50.0,
    y_enc.class_to_idx['yellow']: 25.0
}

In [51]:
# ============================================================================
# STEP 4: MODEL TRAINING
# ============================================================================

def create_models():
    """Create dictionary of models to test"""
    
    models = {
        "dummy_frequent": DummyClassifier(strategy='most_frequent'),
        "dummy_stratified": DummyClassifier(strategy='stratified'),
        
        "logistic_regression": Catch22Classifier(
            estimator=LogisticRegression(
                random_state=42, 
                max_iter=3000, 
                # solver='liblinear',
                solver='saga',
                penalty='l1',
                C=0.1,
                # class_weight='balanced'
                class_weight=class_weight,
            ),
            outlier_norm=True,
            random_state=42,
        ),
        
        "random_forest": Catch22Classifier(
            estimator=RandomForestClassifier(
                n_estimators=100, 
                random_state=42,
                # class_weight='balanced',
                class_weight=class_weight,
                max_depth=10
            ),
            outlier_norm=True,
            random_state=42,
        )
    }
    
    return models

def train_single_model(model, model_name, X_train, X_test, y_train, y_test):
    """Train and evaluate a single model"""
    
    print(f"\nTraining {model_name}...")
    
    try:
        # Train
        model.fit(X_train, y_train)
        
        # Predict
        y_pred = model.predict(X_test)
        
        # Evaluate
        results = {
            "model_name": model_name,
            "accuracy": accuracy_score(y_test, y_pred),
            "f1_macro": f1_score(y_test, y_pred, average='macro', zero_division=0),
            "f1_weighted": f1_score(y_test, y_pred, average='weighted', zero_division=0),
            "predictions": y_pred,
            "true_labels": y_test
        }
        
        print(f"  Accuracy: {results['accuracy']:.4f}")
        print(f"  F1-Macro: {results['f1_macro']:.4f}")
        print(f"  F1-Weighted: {results['f1_weighted']:.4f}")
        
        return results
        
    except Exception as e:
        print(f"  ERROR: {str(e)}")
        return {"model_name": model_name, "error": str(e)}

# Example usage:
# models = create_models()
# results = {}
# for model_name, model in models.items():
#     results[model_name] = train_single_model(model, model_name, X_train, X_test, y_train, y_test)

In [52]:
models = create_models()
results = {}

In [58]:
model_name = 'logistic_regression'
model = models[model_name]
results[model_name] = train_single_model(model, model_name, X_train, X_test, y_train, y_test)


Training logistic_regression...




  Accuracy: 0.3564
  F1-Macro: 0.1628
  F1-Weighted: 0.4671


In [24]:
# ============================================================================
# STEP 5: EVALUATION AND COMPARISON
# ============================================================================

def compare_models(results, class_names=None):
    """Compare model performance"""
    
    print("\n" + "="*80)
    print("MODEL COMPARISON")
    print("="*80)
    
    # Create results DataFrame
    model_data = []
    for model_name, result in results.items():
        if "error" not in result:
            model_data.append({
                "Model": model_name,
                "Accuracy": result["accuracy"],
                "F1-Macro": result["f1_macro"],
                "F1-Weighted": result["f1_weighted"]
            })
    
    if not model_data:
        print("No successful model results to compare!")
        return
    
    df = pd.DataFrame(model_data)
    df = df.sort_values("F1-Macro", ascending=False)
    
    print(df.to_string(index=False, float_format="%.4f"))
    
    # Find best model
    best_model = df.iloc[0]
    print(f"\n🏆 Best model: {best_model['Model']} (F1-Macro: {best_model['F1-Macro']:.4f})")
    
    return df

def detailed_class_analysis(results, class_names, target_class="safety_car"):
    """Analyze performance on specific classes"""
    
    print(f"\n=== DETAILED ANALYSIS: {target_class.upper()} CLASS ===")
    
    # Find target class index
    target_idx = None
    if class_names is not None:
        try:
            target_idx = list(class_names).index(target_class)
        except ValueError:
            print(f"Class '{target_class}' not found in {list(class_names)}")
            return
    
    for model_name, result in results.items():
        if "error" in result:
            continue
            
        y_true = result["true_labels"]
        y_pred = result["predictions"]
        
        # Classification report for target class
        print(f"\n--- {model_name} ---")
        if target_idx is not None:
            target_f1 = f1_score(y_true, y_pred, labels=[target_idx], average='macro', zero_division=0)
            print(f"Safety Car F1: {target_f1:.4f}")
        
        # Confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        print("Confusion Matrix:")
        print(cm)


In [61]:
compare_models(results)


MODEL COMPARISON
              Model  Accuracy  F1-Macro  F1-Weighted
logistic_regression    0.3564    0.1628       0.4671

🏆 Best model: logistic_regression (F1-Macro: 0.1628)


Unnamed: 0,Model,Accuracy,F1-Macro,F1-Weighted
0,logistic_regression,0.356439,0.162772,0.467131


In [55]:
detailed_class_analysis(results, class_names, target_class="safety_car")


=== DETAILED ANALYSIS: SAFETY_CAR CLASS ===

--- logistic_regression ---
Safety Car F1: 0.2034
Confusion Matrix:
[[2036  996 1130  129   98 1768]
 [   1   84    7   10    0   32]
 [   6   11  156    3    4   11]
 [   2    3   23    4    2    6]
 [   0    0    1    1    0    0]
 [  15   15   26    7    1  106]]


In [27]:
def detailed_class_analysis_enhanced(results, class_names, target_class="safety_car"):
    """Analyze performance on specific classes with enhanced visualizations"""
    
    print(f"\n=== DETAILED ANALYSIS: {target_class.upper()} CLASS ===")
    
    # Find target class index
    target_idx = None
    if class_names is not None:
        try:
            target_idx = list(class_names).index(target_class)
            print(f"Target class '{target_class}' found at index {target_idx}")
        except ValueError:
            print(f"Class '{target_class}' not found in {list(class_names)}")
            print("Will analyze all classes instead...")
    
    for model_name, result in results.items():
        if "error" in result:
            continue
            
        y_true = result["true_labels"]
        y_pred = result["predictions"]
        
        print(f"\n{'-' * 60}")
        print(f"MODEL: {model_name.upper()}")
        print(f"{'-' * 60}")
        
        # Per-class metrics
        if target_idx is not None:
            target_f1 = f1_score(y_true, y_pred, labels=[target_idx], average='macro', zero_division=0)
            print(f"🎯 {target_class.title()} F1 Score: {target_f1:.4f}")
        
        # Classification report with class names
        if class_names is not None:
            print("\n📊 Classification Report:")
            try:
                report = classification_report(
                    y_true, y_pred, 
                    target_names=[class_names[i] for i in sorted(np.unique(y_true))],
                    zero_division=0,
                    digits=4
                )
                print(report)
            except Exception as e:
                print(f"Could not generate named classification report: {e}")
                print(classification_report(y_true, y_pred, zero_division=0, digits=4))
        
        # Enhanced confusion matrix with class labels
        print("\n🔍 Confusion Matrix:")
        cm = confusion_matrix(y_true, y_pred)
        
        if class_names is not None:
            # Get unique classes that actually appear in predictions/truth
            unique_classes = sorted(np.unique(np.concatenate([y_true, y_pred])))
            present_class_names = [class_names[i] for i in unique_classes if i < len(class_names)]
            
            # Create DataFrame for prettier display
            cm_df = pd.DataFrame(
                cm, 
                index=[f"True_{name}" for name in present_class_names],
                columns=[f"Pred_{name}" for name in present_class_names]
            )
            print(cm_df.to_string())
            
            # Calculate per-class accuracy for present classes
            print(f"\n📈 Per-Class Performance:")
            for i, class_idx in enumerate(unique_classes):
                if class_idx < len(class_names):
                    class_name = class_names[class_idx]
                    if i < len(cm) and i < len(cm[0]):  # Ensure indices are valid
                        true_positives = cm[i, i] if cm.shape[0] > i and cm.shape[1] > i else 0
                        total_true = cm[i, :].sum() if cm.shape[0] > i else 0
                        total_pred = cm[:, i].sum() if cm.shape[1] > i else 0
                        
                        precision = true_positives / total_pred if total_pred > 0 else 0
                        recall = true_positives / total_true if total_true > 0 else 0
                        
                        print(f"  {class_name:12s}: Precision={precision:.3f}, Recall={recall:.3f}, Count={total_true}")
        else:
            # Fallback to basic confusion matrix
            print(cm)
        
        # Highlight target class performance if available
        if target_idx is not None and class_names is not None:
            print(f"\n🚨 {target_class.title()} Class Analysis:")
            
            # Find where target class appears in the confusion matrix
            target_in_unique = None
            unique_classes = sorted(np.unique(np.concatenate([y_true, y_pred])))
            if target_idx in unique_classes:
                target_in_unique = unique_classes.index(target_idx)
                
                if target_in_unique < cm.shape[0] and target_in_unique < cm.shape[1]:
                    tp = cm[target_in_unique, target_in_unique]  # True positives
                    fn = cm[target_in_unique, :].sum() - tp      # False negatives  
                    fp = cm[:, target_in_unique].sum() - tp      # False positives
                    tn = cm.sum() - tp - fn - fp                 # True negatives
                    
                    print(f"  True Positives:  {tp:4d}")
                    print(f"  False Negatives: {fn:4d} (missed {target_class} events)")
                    print(f"  False Positives: {fp:4d} (false {target_class} alarms)")
                    print(f"  True Negatives:  {tn:4d}")
                    
                    if tp + fn > 0:
                        recall = tp / (tp + fn)
                        print(f"  📉 Recall (Detection Rate): {recall:.3f}")
                    
                    if tp + fp > 0:
                        precision = tp / (tp + fp)
                        print(f"  📊 Precision (Alarm Accuracy): {precision:.3f}")
            else:
                print(f"  ❌ No {target_class} events predicted or found in test set")

def create_pretty_confusion_matrix(y_true, y_pred, class_names, model_name="Model"):
    """Create a standalone pretty confusion matrix with visualization"""
    
    try:
        import matplotlib.pyplot as plt
        import seaborn as sns
        
        # Generate confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        unique_classes = sorted(np.unique(np.concatenate([y_true, y_pred])))
        present_class_names = [class_names[i] for i in unique_classes if i < len(class_names)]
        
        # Create figure
        plt.figure(figsize=(8, 6))
        
        # Create heatmap
        sns.heatmap(
            cm, 
            annot=True, 
            fmt='d', 
            cmap='Blues',
            xticklabels=present_class_names,
            yticklabels=present_class_names,
            cbar_kws={'label': 'Count'}
        )
        
        plt.title(f'Confusion Matrix - {model_name}')
        plt.xlabel('Predicted Class')
        plt.ylabel('True Class')
        plt.xticks(rotation=45)
        plt.yticks(rotation=0)
        plt.tight_layout()
        plt.show()
        
        return plt.gcf()
        
    except ImportError:
        print("📊 Matplotlib/Seaborn not available for visualization")
        return None
    except Exception as e:
        print(f"❌ Error creating visualization: {e}")
        return None

In [60]:
detailed_class_analysis_enhanced(results, class_names, target_class="safety_car")


=== DETAILED ANALYSIS: SAFETY_CAR CLASS ===
Target class 'safety_car' found at index 2

------------------------------------------------------------
MODEL: LOGISTIC_REGRESSION
------------------------------------------------------------
🎯 Safety_Car F1 Score: 0.2034

📊 Classification Report:
              precision    recall  f1-score   support

       green     0.9883    0.3307    0.4956      6157
         red     0.0757    0.6269    0.1352       134
  safety_car     0.1162    0.8168    0.2034       191
         vsc     0.0260    0.1000    0.0412        40
  vsc_ending     0.0000    0.0000    0.0000         2
      yellow     0.0551    0.6235    0.1013       170

    accuracy                         0.3564      6694
   macro avg     0.2102    0.4163    0.1628      6694
weighted avg     0.9154    0.3564    0.4671      6694


🔍 Confusion Matrix:
                 Pred_green  Pred_red  Pred_safety_car  Pred_vsc  Pred_vsc_ending  Pred_yellow
True_green             2036       996          

In [None]:

# ============================================================================
# STEP 6: EXPERIMENT RUNNER
# ============================================================================

def run_single_experiment(scope_name, window_config, cache_dir="./f1_cache"):
    """Run a complete experiment for one configuration"""
    
    print(f"\n{'='*60}")
    print(f"EXPERIMENT: {scope_name}")
    print(f"Window: {window_config['window_size']}, Horizon: {window_config['prediction_horizon']}")
    print(f"{'='*60}")
    
    try:
        # Step 1: Create dataset
        dataset = create_dataset(scope_name, window_config, cache_dir)
        
        # Step 2: Prepare data
        X_train, X_test, y_train, y_test = prepare_data(dataset)
        
        # Step 3: Analyze data
        class_names, class_dist = analyze_class_distribution(dataset, y_train)
        
        # Step 4: Train models
        models = create_models()
        results = {}
        
        for model_name, model in models.items():
            results[model_name] = train_single_model(model, model_name, X_train, X_test, y_train, y_test)
        
        # Step 5: Compare results
        comparison_df = compare_models(results, class_names)
        detailed_class_analysis(results, class_names)
        
        return {
            "scope": scope_name,
            "window_config": window_config,
            "results": results,
            "comparison": comparison_df,
            "class_names": class_names,
            "class_distribution": class_dist
        }
        
    except Exception as e:
        print(f"EXPERIMENT FAILED: {str(e)}")
        return None

# ============================================================================
# STEP 7: BATCH EXECUTION
# ============================================================================

def run_quick_test():
    """Run a single quick test to verify everything works"""
    
    print("🚀 Running quick test...")
    result = run_single_experiment("one_session_all_drivers", WINDOW_CONFIGS[0])
    return result

def run_all_experiments():
    """Run all experiment combinations"""
    
    all_results = []
    
    for scope_name in DATA_SCOPES.keys():
        for window_config in WINDOW_CONFIGS:
            result = run_single_experiment(scope_name, window_config)
            if result:
                all_results.append(result)
    
    return all_results

def create_summary_report(all_results):
    """Create summary report of all experiments"""
    
    summary_data = []
    
    for experiment in all_results:
        scope = experiment["scope"]
        window_size = experiment["window_config"]["window_size"]
        horizon = experiment["window_config"]["prediction_horizon"]
        
        for model_name, result in experiment["results"].items():
            if "error" not in result:
                summary_data.append({
                    "Scope": scope,
                    "Window": window_size,
                    "Horizon": horizon,
                    "Model": model_name,
                    "Accuracy": result["accuracy"],
                    "F1-Macro": result["f1_macro"],
                    "F1-Weighted": result["f1_weighted"]
                })
    
    df = pd.DataFrame(summary_data)
    return df.sort_values(["F1-Macro"], ascending=False)

