In [None]:
# F1 Safety Car Prediction - Step-by-Step Jupyter Notebook Implementation

import numpy as np
import pandas as pd
import json
import pickle
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Dict, Any, List, Optional, Union

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix, precision_recall_fscore_support

from aeon.classification.feature_based import Catch22Classifier
from aeon.classification.dummy import DummyClassifier

import fastf1

from f1_etl import SessionConfig, DataConfig, create_safety_car_dataset, DriverLabelEncoder, FixedVocabTrackStatusEncoder
from f1_etl.config import create_multi_session_configs


In [59]:
@dataclass
class DatasetMetadata:
    """Captures dataset configuration and characteristics"""
    scope: str  # e.g., "2024_season_races", "single_session", etc.
    sessions_config: Dict[str, Any]  # Original sessions configuration
    drivers: List[str]
    include_weather: bool
    window_size: int
    prediction_horizon: int
    handle_non_numeric: str
    handle_missing: bool
    missing_strategy: str
    normalize: bool
    normalization_method: str
    target_column: str
    
    # Dataset characteristics
    total_samples: int
    n_features: int
    n_timesteps: int
    feature_names: Optional[List[str]] = None
    class_distribution: Optional[Dict[str, int]] = None
    
    # Processing details
    features_used: str = "all"  # "all", "speed_only", "custom_subset", etc.
    is_multivariate: bool = True
    preprocessing_steps: List[str] = None

@dataclass
class ModelMetadata:
    """Captures model configuration and hyperparameters"""
    model_type: str  # e.g., "logistic_regression", "random_forest"
    base_estimator: str  # e.g., "LogisticRegression", "RandomForestClassifier"
    wrapper: str = "Catch22Classifier"  # Aeon wrapper used
    
    # Hyperparameters
    hyperparameters: Dict[str, Any] = None
    class_weights: Optional[Dict[int, float]] = None
    custom_weights_applied: bool = False
    
    # Training details
    random_state: Optional[int] = 42
    cv_strategy: Optional[str] = None  # If cross-validation used
    
@dataclass
class EvaluationMetadata:
    """Captures evaluation context and settings"""
    evaluation_id: str
    timestamp: str
    test_size: float
    stratified_split: bool = True
    target_class_focus: str = "safety_car"
    evaluation_metrics: List[str] = None

class ModelEvaluationSuite:
    """Comprehensive model evaluation with metadata tracking and file output"""
    
    def __init__(self, output_dir: str = "evaluation_results"):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        
    def evaluate_model(self, 
                      model, 
                      model_name: str,
                      X_train, X_test, y_train, y_test,
                      dataset_metadata: DatasetMetadata,
                      model_metadata: ModelMetadata,
                      class_names: List[str],
                      target_class: str = "safety_car",
                      save_results: bool = True) -> Dict[str, Any]:
        """
        Comprehensive model evaluation with metadata capture
        """
        
        # Generate evaluation metadata
        eval_metadata = EvaluationMetadata(
            evaluation_id=f"{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            timestamp=datetime.now().isoformat(),
            test_size=len(X_test) / (len(X_train) + len(X_test)),
            target_class_focus=target_class,
            evaluation_metrics=["accuracy", "f1_macro", "f1_weighted", "precision", "recall"]
        )
        
        print(f"\n{'='*80}")
        print(f"EVALUATING: {model_name.upper()}")
        print(f"Evaluation ID: {eval_metadata.evaluation_id}")
        print(f"{'='*80}")
        
        try:
            # Train model
            print("Training model...")
            model.fit(X_train, y_train)
            
            # Generate predictions
            print("Generating predictions...")
            y_pred = model.predict(X_test)
            y_pred_proba = None
            if hasattr(model, 'predict_proba'):
                try:
                    y_pred_proba = model.predict_proba(X_test)
                except:
                    pass
            
            # Calculate comprehensive metrics
            metrics = self._calculate_comprehensive_metrics(
                y_test, y_pred, y_pred_proba, class_names, target_class
            )
            
            # Create results structure
            results = {
                "evaluation_metadata": asdict(eval_metadata),
                "dataset_metadata": asdict(dataset_metadata),
                "model_metadata": asdict(model_metadata),
                "metrics": metrics,
                "predictions": {
                    "y_true": y_test.tolist() if hasattr(y_test, 'tolist') else list(y_test),
                    "y_pred": y_pred.tolist() if hasattr(y_pred, 'tolist') else list(y_pred),
                    "y_pred_proba": y_pred_proba.tolist() if y_pred_proba is not None else None
                },
                "class_info": {
                    "class_names": class_names,
                    "target_class": target_class,
                    "target_class_index": class_names.index(target_class) if target_class in class_names else None
                }
            }
            
            # Print detailed analysis
            self._print_detailed_analysis(results)
            
            # Save results if requested
            if save_results:
                self._save_results(results, eval_metadata.evaluation_id)
            
            return results
            
        except Exception as e:
            error_results = {
                "evaluation_metadata": asdict(eval_metadata),
                "dataset_metadata": asdict(dataset_metadata),
                "model_metadata": asdict(model_metadata),
                "error": str(e),
                "model_name": model_name
            }
            
            if save_results:
                self._save_results(error_results, eval_metadata.evaluation_id)
            
            print(f"ERROR: {str(e)}")
            return error_results
    
    def _calculate_comprehensive_metrics(self, y_true, y_pred, y_pred_proba, class_names, target_class):
        """Calculate comprehensive evaluation metrics"""
        
        # Convert class_names to list consistently at the start
        class_names_list = class_names.tolist() if hasattr(class_names, 'tolist') else list(class_names)
        
        # Basic metrics
        accuracy = accuracy_score(y_true, y_pred)
        f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)
        f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)
        
        # Per-class metrics
        precision, recall, f1, support = precision_recall_fscore_support(
            y_true, y_pred, average=None, zero_division=0
        )
        
        # Confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        
        # Target class specific metrics
        target_metrics = {}
        target_idx = None
        if target_class in class_names_list:
            target_idx = class_names_list.index(target_class)
            unique_classes = sorted(np.unique(np.concatenate([y_true, y_pred])))
            
            if target_idx in unique_classes:
                target_in_cm = unique_classes.index(target_idx)
                if target_in_cm < cm.shape[0] and target_in_cm < cm.shape[1]:
                    tp = cm[target_in_cm, target_in_cm]
                    fn = cm[target_in_cm, :].sum() - tp
                    fp = cm[:, target_in_cm].sum() - tp
                    tn = cm.sum() - tp - fn - fp
                    
                    target_metrics = {
                        "true_positives": int(tp),
                        "false_negatives": int(fn),
                        "false_positives": int(fp),
                        "true_negatives": int(tn),
                        "precision": float(precision[target_in_cm] if target_in_cm < len(precision) else 0),
                        "recall": float(recall[target_in_cm] if target_in_cm < len(recall) else 0),
                        "f1": float(f1[target_in_cm] if target_in_cm < len(f1) else 0),
                        "support": int(support[target_in_cm] if target_in_cm < len(support) else 0)
                    }
        
        # Per-class metrics dictionary
        per_class_metrics = {}
        unique_classes = sorted(np.unique(np.concatenate([y_true, y_pred])))
        # Convert class_names to list if it's a numpy array
        class_names_list = class_names.tolist() if hasattr(class_names, 'tolist') else list(class_names)
        for i, class_idx in enumerate(unique_classes):
            if class_idx < len(class_names_list) and i < len(precision):
                per_class_metrics[class_names_list[class_idx]] = {
                    "precision": float(precision[i]),
                    "recall": float(recall[i]),
                    "f1": float(f1[i]),
                    "support": int(support[i])
                }
        
        return {
            "overall": {
                "accuracy": float(accuracy),
                "f1_macro": float(f1_macro),
                "f1_weighted": float(f1_weighted)
            },
            "per_class": per_class_metrics,
            "target_class_metrics": target_metrics,
            "confusion_matrix": cm.tolist(),
            "classification_report": classification_report(
                y_true, y_pred, 
                target_names=[class_names_list[i] for i in unique_classes if i < len(class_names_list)],
                zero_division=0, 
                output_dict=True
            )
        }
    
    def _print_detailed_analysis(self, results):
        """Print comprehensive analysis to console"""
        
        metrics = results["metrics"]
        target_class = results["class_info"]["target_class"]
        
        print(f"\nüìä OVERALL PERFORMANCE")
        print(f"{'='*50}")
        print(f"Accuracy:    {metrics['overall']['accuracy']:.4f}")
        print(f"F1-Macro:    {metrics['overall']['f1_macro']:.4f}")
        print(f"F1-Weighted: {metrics['overall']['f1_weighted']:.4f}")
        
        if metrics['target_class_metrics']:
            print(f"\nüéØ TARGET CLASS ANALYSIS: {target_class.upper()}")
            print(f"{'='*50}")
            tm = metrics['target_class_metrics']
            print(f"Precision:       {tm['precision']:.4f}")
            print(f"Recall:          {tm['recall']:.4f}")
            print(f"F1-Score:        {tm['f1']:.4f}")
            print(f"True Positives:  {tm['true_positives']:4d}")
            print(f"False Negatives: {tm['false_negatives']:4d} (missed {target_class} events)")
            print(f"False Positives: {tm['false_positives']:4d} (false {target_class} alarms)")
            print(f"True Negatives:  {tm['true_negatives']:4d}")
        
        print(f"\nüìà PER-CLASS PERFORMANCE")
        print(f"{'='*50}")
        for class_name, class_metrics in metrics['per_class'].items():
            print(f"{class_name:12s}: P={class_metrics['precision']:.3f}, "
                  f"R={class_metrics['recall']:.3f}, "
                  f"F1={class_metrics['f1']:.3f}, "
                  f"N={class_metrics['support']}")
        
        print(f"\nüîç CONFUSION MATRIX")
        print(f"{'='*50}")
        cm = np.array(metrics['confusion_matrix'])
        class_names_list = results['class_info']['class_names']
        # Convert to list if it's a numpy array
        if hasattr(class_names_list, 'tolist'):
            class_names_list = class_names_list.tolist()
        unique_classes = sorted(np.unique(np.concatenate([
            results['predictions']['y_true'], 
            results['predictions']['y_pred']
        ])))
        present_class_names = [class_names_list[i] for i in unique_classes if i < len(class_names_list)]
        
        cm_df = pd.DataFrame(
            cm,
            index=[f"True_{name}" for name in present_class_names],
            columns=[f"Pred_{name}" for name in present_class_names]
        )
        print(cm_df.to_string())
    
    def _save_results(self, results, evaluation_id):
        """Save results to JSON and summary text files"""
        
        # Save complete results as JSON
        json_path = self.output_dir / f"{evaluation_id}_complete.json"
        with open(json_path, 'w') as f:
            json.dump(results, f, indent=2, default=str)
        
        # Save human-readable summary
        summary_path = self.output_dir / f"{evaluation_id}_summary.txt"
        with open(summary_path, 'w') as f:
            self._write_summary_report(results, f)
        
        print(f"\nüíæ Results saved:")
        print(f"  Complete: {json_path}")
        print(f"  Summary:  {summary_path}")
    
    def _write_summary_report(self, results, file_handle):
        """Write human-readable summary report"""
        
        f = file_handle
        eval_meta = results["evaluation_metadata"]
        dataset_meta = results["dataset_metadata"]
        model_meta = results["model_metadata"]
        
        f.write("="*80 + "\n")
        f.write("MODEL EVALUATION REPORT\n")
        f.write("="*80 + "\n\n")
        
        # Evaluation Overview
        f.write("EVALUATION OVERVIEW\n")
        f.write("-" * 40 + "\n")
        f.write(f"Evaluation ID: {eval_meta['evaluation_id']}\n")
        f.write(f"Timestamp: {eval_meta['timestamp']}\n")
        f.write(f"Target Class: {eval_meta['target_class_focus']}\n")
        f.write(f"Test Size: {eval_meta['test_size']:.1%}\n\n")
        
        # Dataset Information
        f.write("DATASET CONFIGURATION\n")
        f.write("-" * 40 + "\n")
        f.write(f"Scope: {dataset_meta['scope']}\n")
        f.write(f"Drivers: {', '.join(dataset_meta['drivers'])}\n")
        f.write(f"Window Size: {dataset_meta['window_size']}\n")
        f.write(f"Prediction Horizon: {dataset_meta['prediction_horizon']}\n")
        f.write(f"Features Used: {dataset_meta['features_used']}\n")
        f.write(f"Multivariate: {dataset_meta['is_multivariate']}\n")
        f.write(f"Total Samples: {dataset_meta['total_samples']:,}\n")
        f.write(f"Shape: ({dataset_meta['total_samples']}, {dataset_meta['n_features']}, {dataset_meta['n_timesteps']})\n")
        if dataset_meta['class_distribution']:
            f.write("Class Distribution:\n")
            for class_name, count in dataset_meta['class_distribution'].items():
                f.write(f"  {class_name}: {count:,}\n")
        f.write("\n")
        
        # Model Configuration
        f.write("MODEL CONFIGURATION\n")
        f.write("-" * 40 + "\n")
        f.write(f"Model Type: {model_meta['model_type']}\n")
        f.write(f"Base Estimator: {model_meta['base_estimator']}\n")
        f.write(f"Wrapper: {model_meta['wrapper']}\n")
        f.write(f"Custom Weights: {model_meta['custom_weights_applied']}\n")
        if model_meta['hyperparameters']:
            f.write("Hyperparameters:\n")
            for param, value in model_meta['hyperparameters'].items():
                f.write(f"  {param}: {value}\n")
        if model_meta['class_weights']:
            f.write("Class Weights:\n")
            for class_idx, weight in model_meta['class_weights'].items():
                f.write(f"  Class {class_idx}: {weight}\n")
        f.write("\n")
        
        # Performance Results
        if "metrics" in results:
            metrics = results["metrics"]
            target_class = results["class_info"]["target_class"]
            
            f.write("PERFORMANCE RESULTS\n")
            f.write("-" * 40 + "\n")
            f.write(f"Overall Accuracy: {metrics['overall']['accuracy']:.4f}\n")
            f.write(f"F1-Macro: {metrics['overall']['f1_macro']:.4f}\n")
            f.write(f"F1-Weighted: {metrics['overall']['f1_weighted']:.4f}\n\n")
            
            if metrics['target_class_metrics']:
                f.write(f"TARGET CLASS ANALYSIS: {target_class.upper()}\n")
                f.write("-" * 40 + "\n")
                tm = metrics['target_class_metrics']
                f.write(f"Precision: {tm['precision']:.4f}\n")
                f.write(f"Recall: {tm['recall']:.4f}\n")
                f.write(f"F1-Score: {tm['f1']:.4f}\n")
                f.write(f"True Positives: {tm['true_positives']:,}\n")
                f.write(f"False Negatives: {tm['false_negatives']:,} (missed events)\n")
                f.write(f"False Positives: {tm['false_positives']:,} (false alarms)\n")
                f.write(f"True Negatives: {tm['true_negatives']:,}\n\n")
            
            f.write("PER-CLASS PERFORMANCE\n")
            f.write("-" * 40 + "\n")
            f.write(f"{'Class':<12} {'Precision':<10} {'Recall':<10} {'F1':<10} {'Support':<10}\n")
            f.write("-" * 60 + "\n")
            for class_name, class_metrics in metrics['per_class'].items():
                f.write(f"{class_name:<12} {class_metrics['precision']:<10.3f} "
                       f"{class_metrics['recall']:<10.3f} {class_metrics['f1']:<10.3f} "
                       f"{class_metrics['support']:<10}\n")
        
        else:
            f.write("ERROR OCCURRED\n")
            f.write("-" * 40 + "\n")
            f.write(f"Error: {results.get('error', 'Unknown error')}\n")

# Helper functions for easy metadata creation
def create_dataset_metadata_from_f1_config(dataset_config, dataset, processing_config=None, features_used="all"):
    """
    Create DatasetMetadata from F1 ETL configuration and dataset object
    
    Parameters:
    -----------
    dataset_config : DataConfig
        The F1 ETL DataConfig object
    dataset : dict
        The dataset dictionary returned by create_safety_car_dataset
    processing_config : dict, optional
        The processing config from dataset['config'] if available
    features_used : str
        Description of which features were used
    """
    
    X = dataset['X']
    y = dataset['y']
    
    # Use processing config from dataset if available
    if processing_config is None and 'config' in dataset:
        processing_config = dataset['config']
    
    # Determine scope description
    sessions = dataset_config.sessions if hasattr(dataset_config, 'sessions') else []
    if len(sessions) == 1:
        session = sessions[0]
        year = getattr(session, 'year', 'unknown')
        race = getattr(session, 'race', 'unknown')
        session_type = getattr(session, 'session_type', 'unknown')
        scope = f"single_session_{year}_{race}_{session_type}".replace(' ', '_')
    elif len(sessions) > 1:
        years = list(set(getattr(s, 'year', None) for s in sessions))
        years = [y for y in years if y is not None]
        session_types = list(set(getattr(s, 'session_type', None) for s in sessions))
        session_types = [st for st in session_types if st is not None]
        
        if years and session_types:
            year_str = '-'.join(map(str, sorted(years)))
            type_str = '_'.join(sorted(session_types))
            scope = f"multi_session_{year_str}_{type_str}_{len(sessions)}sessions"
        else:
            scope = f"multi_session_{len(sessions)}sessions"
    else:
        scope = "unknown_scope"
    
    # Get class distribution
    unique, counts = np.unique(y, return_counts=True)
    label_encoder = dataset.get('label_encoder')
    class_dist = {}
    if label_encoder and hasattr(label_encoder, 'class_to_idx'):
        idx_to_class = {v: k for k, v in label_encoder.class_to_idx.items()}
        class_dist = {str(idx_to_class.get(idx, f"class_{idx}")): int(count) 
                     for idx, count in zip(unique, counts)}
    elif label_encoder and hasattr(label_encoder, 'classes_'):
        # Standard sklearn LabelEncoder
        class_names = label_encoder.classes_
        class_dist = {str(class_names[idx]): int(count) 
                     for idx, count in zip(unique, counts) if idx < len(class_names)}
    
    # Extract feature names if available
    feature_names = None
    if processing_config and 'feature_names' in processing_config:
        feature_names = processing_config['feature_names']
    elif 'metadata' in dataset and dataset['metadata']:
        # Try to get from first metadata entry
        meta_entry = dataset['metadata'][0] if isinstance(dataset['metadata'], list) else dataset['metadata']
        if isinstance(meta_entry, dict) and 'features_used' in meta_entry:
            feature_names = meta_entry['features_used']
    
    # Get preprocessing steps
    preprocessing_steps = []
    if processing_config:
        if processing_config.get('missing_values_handled', False):
            preprocessing_steps.append(f"missing_values_handled_{processing_config.get('missing_strategy', 'unknown')}")
        if processing_config.get('normalization_applied', False):
            preprocessing_steps.append(f"normalized_{processing_config.get('normalization_method', 'unknown')}")
    
    return DatasetMetadata(
        scope=scope,
        sessions_config=[{
            'year': getattr(s, 'year', None),
            'race': getattr(s, 'race', None), 
            'session_type': getattr(s, 'session_type', None)
        } for s in sessions],
        drivers=getattr(dataset_config, 'drivers', []),
        include_weather=getattr(dataset_config, 'include_weather', False),
        window_size=processing_config.get('window_size', 100) if processing_config else 100,
        prediction_horizon=processing_config.get('prediction_horizon', 10) if processing_config else 10,
        handle_non_numeric=processing_config.get('handle_non_numeric', 'encode') if processing_config else 'encode',
        handle_missing=processing_config.get('handle_missing', True) if processing_config else True,
        missing_strategy=processing_config.get('missing_strategy', 'forward_fill') if processing_config else 'forward_fill',
        normalize=processing_config.get('normalize', True) if processing_config else True,
        normalization_method=processing_config.get('normalization_method', 'per_sequence') if processing_config else 'per_sequence',
        target_column=processing_config.get('target_column', 'TrackStatus') if processing_config else 'TrackStatus',
        total_samples=X.shape[0],
        n_features=X.shape[1] if len(X.shape) > 1 else 1,
        n_timesteps=X.shape[2] if len(X.shape) > 2 else X.shape[1],
        feature_names=feature_names,
        class_distribution=class_dist,
        features_used=features_used,
        is_multivariate=len(X.shape) > 2 and X.shape[1] > 1,
        preprocessing_steps=preprocessing_steps
    )

def create_metadata_from_f1_dataset(data_config, dataset, features_used="multivariate_all_9_features"):
    """
    Convenience function to create metadata from F1 dataset
    """
    return create_dataset_metadata_from_f1_config(
        dataset_config=data_config,
        dataset=dataset,
        processing_config=dataset.get('config'),  # Use the config from the dataset
        features_used=features_used
    )

def create_model_metadata(model_name, model, class_weights=None):
    """Create ModelMetadata from model configuration"""
    
    # Extract hyperparameters
    hyperparams = {}
    if hasattr(model, 'estimator') and hasattr(model.estimator, 'get_params'):
        hyperparams = model.estimator.get_params()
    elif hasattr(model, 'get_params'):
        hyperparams = model.get_params()
    
    # Determine base estimator name
    base_estimator = "Unknown"
    if hasattr(model, 'estimator'):
        base_estimator = model.estimator.__class__.__name__
    else:
        base_estimator = model.__class__.__name__
    
    return ModelMetadata(
        model_type=model_name,
        base_estimator=base_estimator,
        wrapper="Catch22Classifier" if hasattr(model, 'estimator') else "Direct",
        hyperparameters=hyperparams,
        class_weights=class_weights,
        custom_weights_applied=class_weights is not None,
        random_state=hyperparams.get('random_state', None)
    )

In [58]:
# 1. Create the evaluation suite
evaluator = ModelEvaluationSuite(output_dir="evaluation_results")

In [26]:
# sessions_2024_season = create_multi_session_configs(
#     year=2024, 
#     session_types=['R'], 
#     include_testing=False
# )
# data_config = DataConfig(
#     sessions=sessions_2024_season, 
#     drivers=['2'], 
#     include_weather=False
# )
data_config = DataConfig(
    sessions=[SessionConfig(2024, 'Saudia Arabian Grand Prix', 'R')],
    drivers=['1'],
    include_weather=False
)

In [27]:
dataset = create_safety_car_dataset(
    config=data_config,
    window_size=100,
    prediction_horizon=10,
    handle_non_numeric="encode",
    handle_missing=True,
    missing_strategy="forward_fill",
    normalize=True,
    normalization_method="per_sequence",
    target_column="TrackStatus",
    enable_debug=False
)

2025-07-02 21:38:26,868 - f1_etl - INFO - Preprocessing configuration:
2025-07-02 21:38:26,869 - f1_etl - INFO -   Missing values: enabled (forward_fill)
2025-07-02 21:38:26,870 - f1_etl - INFO -   Normalization: enabled (per_sequence)
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Loading session: 2024 Saudia Arabian Grand Prix R


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
2025-07-02 21:38:29,525 - f1_etl - INFO - Creating new fixed vocabulary encoder
2025-07-02 21:38:29,559 - f1_etl - INFO - Processing 66149 total telemetry rows
2025-07-02 21:38:29,560 - f1_etl - INFO - Grouping by: ['SessionId', 'Driver']



üìä Track Status Analysis (training_data):
   green       : 59396 samples ( 89.8%)
   safety_car  :  3135 samples (  4.7%)
   yellow      :  3618 samples (  5.5%)
   Missing classes: [np.str_('red'), np.str_('unknown'), np.str_('vsc'), np.str_('vsc_ending')]
‚úÖ FixedVocabTrackStatusEncoder fitted
   Classes seen: ['green', 'safety_car', 'yellow']
   Total classes: 7
   Output mode: integer labels


2025-07-02 21:38:29,950 - f1_etl - INFO - Total sequences generated: 1321
2025-07-02 21:38:29,959 - f1_etl - INFO - Generated 1321 sequences with shape (1321, 100, 9)
2025-07-02 21:38:29,963 - f1_etl - INFO - No missing values detected, skipping imputation
2025-07-02 21:38:29,964 - f1_etl - INFO - Applying normalization with method: per_sequence
2025-07-02 21:38:29,994 - f1_etl - INFO - Final dataset summary:
2025-07-02 21:38:29,994 - f1_etl - INFO -   Sequences: 1321
2025-07-02 21:38:29,995 - f1_etl - INFO -   Features: 9
2025-07-02 21:38:29,995 - f1_etl - INFO -   Classes: 7 (integer)
2025-07-02 21:38:29,996 - f1_etl - INFO -   Label shape: (1321,)
2025-07-02 21:38:29,996 - f1_etl - INFO -     green       :  1188 samples ( 89.9%)
2025-07-02 21:38:29,996 - f1_etl - INFO -     safety_car  :    62 samples (  4.7%)
2025-07-02 21:38:29,996 - f1_etl - INFO -     yellow      :    71 samples (  5.4%)


In [60]:
#Create dataset metadata from configuration
dataset_metadata = create_metadata_from_f1_dataset(
    data_config=data_config,
    dataset=dataset,
    features_used="multivariate_all_9_features"
)

In [29]:
# 3. Create class names list from your label encoder
class_names = list(dataset['label_encoder'].class_to_idx.keys())

In [41]:
def prepare_data(dataset, test_size=0.2):
    """Prepare train/test splits and convert to Aeon format"""
    
    X = dataset['X']  # Shape: (n_samples, n_timesteps, n_features)
    y = dataset['y']  # Encoded labels
    
    # Convert to Aeon format: (n_samples, n_features, n_timesteps)
    X_aeon = X.transpose(0, 2, 1)
    
    # Use only Speed feature (index 0) for simplicity
    # X_speed = X_aeon[:, 0:1, :]  # Keep 3D: (n_samples, 1, n_timesteps)
    
    # Train/test split with stratification
    X_train, X_test, y_train, y_test = train_test_split(
        # X_speed, y, test_size=test_size, random_state=42, stratify=y
        X_aeon, y, test_size=test_size, random_state=42, stratify=y
    )
    
    return X_train, X_test, y_train, y_test

def analyze_class_distribution(dataset, y_train):
    """Analyze and display class distribution"""
    
    # Get class names
    label_encoder = dataset['label_encoder']
    class_names = label_encoder.get_classes()
    
    # Count classes
    unique, counts = np.unique(y_train, return_counts=True)
    
    print("\n=== CLASS DISTRIBUTION ===")
    for class_id, count in zip(unique, counts):
        class_name = class_names[class_id] if class_id < len(class_names) else f"Class_{class_id}"
        percentage = count / len(y_train) * 100
        print(f"{class_name:12s}: {count:5d} samples ({percentage:5.1f}%)")
    
    imbalance_ratio = max(counts) / min(counts)
    print(f"\nImbalance ratio: {imbalance_ratio:.1f}:1")
    
    return class_names, dict(zip(unique, counts))

# Example usage:
X_train, X_test, y_train, y_test = prepare_data(dataset)
class_names, class_dist = analyze_class_distribution(dataset, y_train)


=== CLASS DISTRIBUTION ===
green       :   950 samples ( 90.0%)
safety_car  :    49 samples (  4.6%)
yellow      :    57 samples (  5.4%)

Imbalance ratio: 19.4:1


In [31]:
X_train.shape

(1056, 9, 100)

In [32]:
y_enc = dataset['label_encoder']
y_enc.class_to_idx

{np.str_('green'): 0,
 np.str_('red'): 1,
 np.str_('safety_car'): 2,
 np.str_('unknown'): 3,
 np.str_('vsc'): 4,
 np.str_('vsc_ending'): 5,
 np.str_('yellow'): 6}

In [33]:
# class_weight = {
#     y_enc.class_to_idx['green']: 1.0,        # 0
#     y_enc.class_to_idx['red']: 10.0,         # 1  
#     y_enc.class_to_idx['safety_car']: 20.0,  # 2 (your target class)
#     y_enc.class_to_idx['unknown']: 5.0,      # 3
#     y_enc.class_to_idx['vsc']: 30.0,         # 4
#     y_enc.class_to_idx['vsc_ending']: 100.0, # 5
#     y_enc.class_to_idx['yellow']: 8.0        # 6
# }

class_weight = {
    y_enc.class_to_idx['green']: 1.0,
    y_enc.class_to_idx['red']: 25.0,         
    y_enc.class_to_idx['safety_car']: 100.0,  # Much higher
    y_enc.class_to_idx['unknown']: 25.0,      
    y_enc.class_to_idx['vsc']: 50.0,
    y_enc.class_to_idx['vsc_ending']: 50.0,
    y_enc.class_to_idx['yellow']: 25.0
}

In [34]:
def create_models(class_weight: Optional[Dict] = None):
    """Create dictionary of models to test"""

    cls_weight = 'balanced' if class_weight is None else class_weight
    
    models = {
        "dummy_frequent": DummyClassifier(strategy='most_frequent'),
        
        "dummy_stratified": DummyClassifier(strategy='stratified'),
        
        "logistic_regression": Catch22Classifier(
            estimator=LogisticRegression(
                random_state=42, 
                max_iter=3000, 
                # solver='liblinear',
                solver='saga',
                penalty='l1',
                C=0.1,
                class_weight=cls_weight,
            ),
            outlier_norm=True,
            random_state=42,
        ),
        
        "random_forest": Catch22Classifier(
            estimator=RandomForestClassifier(
                n_estimators=100, 
                random_state=42,
                class_weight=cls_weight,
                max_depth=10
            ),
            outlier_norm=True,
            random_state=42,
        )
    }
    
    return models

def train_single_model(model, model_name, X_train, X_test, y_train, y_test):
    """Train and evaluate a single model"""
    
    print(f"\nTraining {model_name}...")
    
    try:
        # Train
        model.fit(X_train, y_train)
        
        # Predict
        y_pred = model.predict(X_test)
        
        # Evaluate
        results = {
            "model_name": model_name,
            "accuracy": accuracy_score(y_test, y_pred),
            "f1_macro": f1_score(y_test, y_pred, average='macro', zero_division=0),
            "f1_weighted": f1_score(y_test, y_pred, average='weighted', zero_division=0),
            "predictions": y_pred,
            "true_labels": y_test
        }
        
        print(f"  Accuracy: {results['accuracy']:.4f}")
        print(f"  F1-Macro: {results['f1_macro']:.4f}")
        print(f"  F1-Weighted: {results['f1_weighted']:.4f}")
        
        return results
        
    except Exception as e:
        print(f"  ERROR: {str(e)}")
        return {"model_name": model_name, "error": str(e)}

# Example usage:
# models = create_models()
# results = {}
# for model_name, model in models.items():
#     results[model_name] = train_single_model(model, model_name, X_train, X_test, y_train, y_test)

In [46]:
models = create_models(class_weight=class_weight)
results = {}

model_name = 'logistic_regression'
model = models[model_name]

# results[model_name] = train_single_model(model, model_name, X_train, X_test, y_train, y_test)

In [47]:
# Create model metadata
model_metadata = create_model_metadata(
    model_name=model_name,
    model=model,
    class_weights=class_weight
)

In [48]:
# Run comprehensive evaluation
results = evaluator.evaluate_model(
    model=model,
    model_name=model_name,
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    dataset_metadata=dataset_metadata,
    model_metadata=model_metadata,
    class_names=list(class_names),  # Ensure it's a list
    target_class="safety_car",
    save_results=True
)


EVALUATING: LOGISTIC_REGRESSION
Evaluation ID: logistic_regression_20250702_214809
Training model...




Generating predictions...

üìä OVERALL PERFORMANCE
Accuracy:    0.8113
F1-Macro:    0.6413
F1-Weighted: 0.8477

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.2391
Recall:          0.8462
F1-Score:        0.3729
True Positives:    11
False Negatives:    2 (missed safety_car events)
False Positives:   35 (false safety_car alarms)
True Negatives:   217

üìà PER-CLASS PERFORMANCE
green       : P=0.985, R=0.803, F1=0.884, N=238
safety_car  : P=0.239, R=0.846, F1=0.373, N=13
yellow      : P=0.520, R=0.929, F1=0.667, N=14

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green              191               35           12
True_safety_car           2               11            0
True_yellow               1                0           13

üíæ Results saved:
  Complete: evaluation_results/logistic_regression_20250702_214809_complete.json
  Summary:  evaluation_results/logistic_regression_20250702_214809_summary.txt


In [49]:
# 5. For evaluating against a different test set
def evaluate_on_different_test_set(model, test_config, evaluator, model_metadata, dataset_metadata, class_names):
    """Evaluate trained model on a different test set"""
    
    # Load different test set
    test_dataset = create_safety_car_dataset(
        config=test_config,
        window_size=dataset_metadata.window_size,
        prediction_horizon=dataset_metadata.prediction_horizon,
        handle_non_numeric="encode",
        handle_missing=True,
        missing_strategy="forward_fill",
        normalize=True,
        normalization_method="per_sequence",
        target_column="TrackStatus",
        enable_debug=False
    )
    
    X_test_new = test_dataset['X'].transpose(0, 2, 1)  # Convert to Aeon format
    y_test_new = test_dataset['y']
    
    # Create new dataset metadata for this test set
    test_dataset_metadata = create_dataset_metadata_from_config(
        dataset_config=test_config,
        dataset=test_dataset,
        features_used=dataset_metadata.features_used
    )
    test_dataset_metadata.scope = f"external_test_{test_dataset_metadata.scope}"
    
    # Evaluate without retraining (model already fitted)
    try:
        y_pred_new = model.predict(X_test_new)
        y_pred_proba = None
        if hasattr(model, 'predict_proba'):
            try:
                y_pred_proba = model.predict_proba(X_test_new)
            except:
                pass
        
        # Create evaluation metadata
        eval_metadata = EvaluationMetadata(
            evaluation_id=f"{model_metadata.model_type}_external_test_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            timestamp=datetime.now().isoformat(),
            test_size=1.0,  # 100% test data
            target_class_focus="safety_car",
            evaluation_metrics=["accuracy", "f1_macro", "f1_weighted", "precision", "recall"]
        )
        
        # Calculate metrics directly
        metrics = evaluator._calculate_comprehensive_metrics(
            y_test_new, y_pred_new, y_pred_proba, list(class_names), "safety_car"
        )
        
        # Create results structure
        results = {
            "evaluation_metadata": asdict(eval_metadata),
            "dataset_metadata": asdict(test_dataset_metadata),
            "model_metadata": asdict(model_metadata),
            "metrics": metrics,
            "predictions": {
                "y_true": y_test_new.tolist(),
                "y_pred": y_pred_new.tolist(),
                "y_pred_proba": y_pred_proba.tolist() if y_pred_proba is not None else None
            },
            "class_info": {
                "class_names": list(class_names),
                "target_class": "safety_car",
                "target_class_index": list(class_names).index("safety_car")
            },
            "note": "External test set evaluation - model was pre-trained"
        }
        
        # Print and save results
        evaluator._print_detailed_analysis(results)
        evaluator._save_results(results, eval_metadata.evaluation_id)
        
        return results
        
    except Exception as e:
        print(f"Error evaluating on external test set: {e}")
        import traceback
        traceback.print_exc()
        return None

# Example usage:
# Train on 2024 Saudi Arabian GP, test on 2025 Saudi Arabian GP
test_config = DataConfig(
    sessions=[SessionConfig(2025, 'Saudi Arabian Grand Prix', 'R')],
    drivers=['1'],
    include_weather=False
)

external_results = evaluate_on_different_test_set(
    model=model,  # Your already trained model
    test_config=test_config,
    evaluator=evaluator,
    model_metadata=model_metadata,
    dataset_metadata=dataset_metadata,
    class_names=class_names
)

2025-07-02 22:00:03,094 - f1_etl - INFO - Preprocessing configuration:
2025-07-02 22:00:03,095 - f1_etl - INFO -   Missing values: enabled (forward_fill)
2025-07-02 22:00:03,095 - f1_etl - INFO -   Normalization: enabled (per_sequence)


Loading session: 2025 Saudi Arabian Grand Prix R


core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 


üìä Track Status Analysis (training_data):
   green       : 60283 samples ( 95.2%)
   safety_car  :  2777 samples (  4.4%)
   yellow      :   270 samples (  0.4%)
   Missing classes: [np.str_('red'), np.str_('unknown'), np.str_('vsc'), np.str_('vsc_ending')]
‚úÖ FixedVocabTrackStatusEncoder fitted
   Classes seen: ['green', 'safety_car', 'yellow']
   Total classes: 7
   Output mode: integer labels


2025-07-02 22:00:16,681 - f1_etl - INFO - Total sequences generated: 1265
2025-07-02 22:00:16,695 - f1_etl - INFO - Generated 1265 sequences with shape (1265, 100, 9)
2025-07-02 22:00:16,697 - f1_etl - INFO - No missing values detected, skipping imputation
2025-07-02 22:00:16,697 - f1_etl - INFO - Applying normalization with method: per_sequence
2025-07-02 22:00:16,727 - f1_etl - INFO - Final dataset summary:
2025-07-02 22:00:16,728 - f1_etl - INFO -   Sequences: 1265
2025-07-02 22:00:16,728 - f1_etl - INFO -   Features: 9
2025-07-02 22:00:16,729 - f1_etl - INFO -   Classes: 7 (integer)
2025-07-02 22:00:16,729 - f1_etl - INFO -   Label shape: (1265,)
2025-07-02 22:00:16,730 - f1_etl - INFO -     green       :  1204 samples ( 95.2%)
2025-07-02 22:00:16,730 - f1_etl - INFO -     safety_car  :    55 samples (  4.3%)
2025-07-02 22:00:16,730 - f1_etl - INFO -     yellow      :     6 samples (  0.5%)



üìä OVERALL PERFORMANCE
Accuracy:    0.7296
F1-Macro:    0.3706
F1-Weighted: 0.8099

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.1673
Recall:          0.7455
F1-Score:        0.2733
True Positives:    41
False Negatives:   14 (missed safety_car events)
False Positives:  204 (false safety_car alarms)
True Negatives:  1006

üìà PER-CLASS PERFORMANCE
green       : P=0.980, R=0.733, F1=0.838, N=1204
safety_car  : P=0.167, R=0.745, F1=0.273, N=55
yellow      : P=0.000, R=0.000, F1=0.000, N=6

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green              882              204          118
True_safety_car          12               41            2
True_yellow               6                0            0

üíæ Results saved:
  Complete: evaluation_results/logistic_regression_external_test_20250702_220021_complete.json
  Summary:  evaluation_results/logistic_regression_external_test_20250702_220021_summary.txt


In [50]:
# 6. Batch evaluation of multiple models
def run_comprehensive_model_comparison(X_train, X_test, y_train, y_test, dataset_metadata, class_names, evaluator, class_weight=None):
    """Run evaluation on all models and save results"""
    
    models = create_models(class_weight=class_weight)
    all_results = {}
    
    for model_name, model in models.items():
        print(f"\n{'='*100}")
        print(f"EVALUATING MODEL: {model_name}")
        print(f"{'='*100}")
        
        # Create model-specific metadata
        model_metadata = create_model_metadata(
            model_name=model_name,
            model=model,
            class_weights=class_weight if 'logistic' in model_name or 'forest' in model_name else None
        )
        
        # Run evaluation
        results = evaluator.evaluate_model(
            model=model,
            model_name=model_name,
            X_train=X_train,
            X_test=X_test,
            y_train=y_train,
            y_test=y_test,
            dataset_metadata=dataset_metadata,
            model_metadata=model_metadata,
            class_names=list(class_names),
            target_class="safety_car",
            save_results=True
        )
        
        all_results[model_name] = results
    
    # Save comparison summary
    comparison_id = f"model_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    comparison_path = evaluator.output_dir / f"{comparison_id}_comparison.json"
    
    with open(comparison_path, 'w') as f:
        json.dump({
            "comparison_id": comparison_id,
            "timestamp": datetime.now().isoformat(),
            "models_compared": list(all_results.keys()),
            "results": all_results
        }, f, indent=2, default=str)
    
    print(f"\nüìä Model comparison saved: {comparison_path}")
    return all_results

# 7. Example with different feature configurations
def evaluate_feature_configurations(dataset, dataset_metadata, class_names, evaluator, class_weight=None):
    """Evaluate same model with different feature configurations"""
    
    X = dataset['X']  # Shape: (n_samples, n_timesteps, n_features)
    y = dataset['y']
    X_aeon = X.transpose(0, 2, 1)  # Convert to Aeon format
    
    configurations = [
        {
            "name": "speed_only",
            "X_data": X_aeon[:, 0:1, :],  # Speed only
            "description": "univariate_speed_only"
        },
        {
            "name": "speed_and_throttle", 
            "X_data": X_aeon[:, [0, 3], :],  # Speed and throttle (assuming throttle is index 3)
            "description": "bivariate_speed_throttle"
        },
        {
            "name": "all_features",
            "X_data": X_aeon,  # All features
            "description": "multivariate_all_9_features"
        }
    ]
    
    base_model_name = "logistic_regression"
    all_results = {}
    
    for config in configurations:
        print(f"\n{'='*80}")
        print(f"FEATURE CONFIGURATION: {config['name'].upper()}")
        print(f"{'='*80}")
        
        # Split data for this configuration
        X_train_config, X_test_config, y_train_config, y_test_config = train_test_split(
            config["X_data"], y, test_size=0.2, random_state=42, stratify=y
        )
        
        # Create a copy of dataset metadata and update it
        from copy import deepcopy
        config_dataset_metadata = deepcopy(dataset_metadata)
        config_dataset_metadata.features_used = config["description"]
        config_dataset_metadata.n_features = config["X_data"].shape[1]
        config_dataset_metadata.is_multivariate = config["X_data"].shape[1] > 1
        
        # Create fresh model instance
        model_config = Catch22Classifier(
            estimator=LogisticRegression(
                random_state=42,
                max_iter=3000,
                solver='saga',
                penalty='l1',
                C=0.1,
                class_weight=class_weight
            ),
            outlier_norm=True,
            random_state=42
        )
        
        # Update model metadata
        config_model_metadata = create_model_metadata(
            model_name=f"{base_model_name}_{config['name']}",
            model=model_config,
            class_weights=class_weight
        )
        
        # Run evaluation
        results = evaluator.evaluate_model(
            model=model_config,
            model_name=f"{base_model_name}_{config['name']}",
            X_train=X_train_config,
            X_test=X_test_config,
            y_train=y_train_config,
            y_test=y_test_config,
            dataset_metadata=config_dataset_metadata,
            model_metadata=config_model_metadata,
            class_names=list(class_names),
            target_class="safety_car",
            save_results=True
        )
        
        all_results[config['name']] = results
    
    return all_results

# 8. Quick evaluation function for iterative testing
def quick_eval(dataset, dataset_metadata, class_names, evaluator, model_name="logistic_regression", 
               custom_weights=None, feature_subset=None):
    """Quick evaluation for rapid iteration"""
    
    X = dataset['X']
    y = dataset['y']
    X_aeon = X.transpose(0, 2, 1)
    
    # Prepare data
    X_data = X_aeon if feature_subset is None else X_aeon[:, feature_subset, :]
    X_train_q, X_test_q, y_train_q, y_test_q = train_test_split(
        X_data, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Create model
    if model_name == "logistic_regression":
        model = Catch22Classifier(
            estimator=LogisticRegression(
                random_state=42,
                max_iter=3000,
                solver='saga',
                penalty='l1',
                C=0.1,
                class_weight=custom_weights or class_weight
            ),
            outlier_norm=True,
            random_state=42
        )
    else:
        raise ValueError(f"Quick eval not implemented for {model_name}")
    
    # Create metadata
    feature_desc = f"custom_subset_{len(feature_subset) if feature_subset else 'all'}_features"
    
    from copy import deepcopy
    quick_dataset_metadata = deepcopy(dataset_metadata)
    quick_dataset_metadata.features_used = feature_desc
    quick_dataset_metadata.n_features = X_data.shape[1]
    
    quick_model_metadata = create_model_metadata(
        model_name=f"{model_name}_quick",
        model=model,
        class_weights=custom_weights
    )
    
    # Run evaluation
    return evaluator.evaluate_model(
        model=model,
        model_name=f"{model_name}_quick",
        X_train=X_train_q,
        X_test=X_test_q,
        y_train=y_train_q,
        y_test=y_test_q,
        dataset_metadata=quick_dataset_metadata,
        model_metadata=quick_model_metadata,
        class_names=list(class_names),
        target_class="safety_car",
        save_results=True
    )

In [51]:
# Fixed example usage patterns at the bottom
print("\n=== QUICK ITERATION TESTING ===")
# Test with just speed feature
speed_results = quick_eval(
    dataset=dataset,
    dataset_metadata=dataset_metadata,
    class_names=class_names,
    evaluator=evaluator,
    feature_subset=[0]
)


=== QUICK ITERATION TESTING ===

EVALUATING: LOGISTIC_REGRESSION_QUICK
Evaluation ID: logistic_regression_quick_20250702_220343
Training model...
Generating predictions...

üìä OVERALL PERFORMANCE
Accuracy:    0.2566
F1-Macro:    0.2455
F1-Weighted: 0.2918

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.0935
Recall:          1.0000
F1-Score:        0.1711
True Positives:    13
False Negatives:    0 (missed safety_car events)
False Positives:  126 (false safety_car alarms)
True Negatives:   126

üìà PER-CLASS PERFORMANCE
green       : P=1.000, R=0.176, F1=0.300, N=238
safety_car  : P=0.094, R=1.000, F1=0.171, N=13
yellow      : P=0.155, R=0.929, F1=0.265, N=14

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green               42              125           71
True_safety_car           0               13            0
True_yellow               0                1           13

üíæ Results saved:
  Complete: evaluation_results/logistic_re



In [52]:
# Test with different class weights
high_safety_weights = {
    dataset['label_encoder'].class_to_idx['green']: 1.0,
    dataset['label_encoder'].class_to_idx['red']: 50.0,         
    dataset['label_encoder'].class_to_idx['safety_car']: 200.0,  # Even higher
    dataset['label_encoder'].class_to_idx['unknown']: 50.0,      
    dataset['label_encoder'].class_to_idx['vsc']: 100.0,
    dataset['label_encoder'].class_to_idx['vsc_ending']: 100.0,
    dataset['label_encoder'].class_to_idx['yellow']: 50.0
}

high_weight_results = quick_eval(
    dataset=dataset,
    dataset_metadata=dataset_metadata,
    class_names=class_names,
    evaluator=evaluator,
    custom_weights=high_safety_weights
)


EVALUATING: LOGISTIC_REGRESSION_QUICK
Evaluation ID: logistic_regression_quick_20250702_220404
Training model...




Generating predictions...

üìä OVERALL PERFORMANCE
Accuracy:    0.7245
F1-Macro:    0.5681
F1-Weighted: 0.7843

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.1818
Recall:          0.9231
F1-Score:        0.3038
True Positives:    12
False Negatives:    1 (missed safety_car events)
False Positives:   54 (false safety_car alarms)
True Negatives:   198

üìà PER-CLASS PERFORMANCE
green       : P=0.994, R=0.702, F1=0.823, N=238
safety_car  : P=0.182, R=0.923, F1=0.304, N=13
yellow      : P=0.419, R=0.929, F1=0.578, N=14

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green              167               53           18
True_safety_car           1               12            0
True_yellow               0                1           13

üíæ Results saved:
  Complete: evaluation_results/logistic_regression_quick_20250702_220404_complete.json
  Summary:  evaluation_results/logistic_regression_quick_20250702_220404_summary.txt


In [None]:
# Comprehensive model comparison
print("\n=== COMPREHENSIVE MODEL COMPARISON ===")
all_model_results = run_comprehensive_model_comparison(
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    dataset_metadata=dataset_metadata,
    class_names=class_names,
    evaluator=evaluator,
    class_weight=class_weight
)


=== COMPREHENSIVE MODEL COMPARISON ===

EVALUATING MODEL: dummy_frequent

EVALUATING: DUMMY_FREQUENT
Evaluation ID: dummy_frequent_20250702_221117
Training model...
Generating predictions...

üìä OVERALL PERFORMANCE
Accuracy:    0.8981
F1-Macro:    0.3154
F1-Weighted: 0.8499

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.0000
Recall:          0.0000
F1-Score:        0.0000
True Positives:     0
False Negatives:   13 (missed safety_car events)
False Positives:    0 (false safety_car alarms)
True Negatives:   252

üìà PER-CLASS PERFORMANCE
green       : P=0.898, R=1.000, F1=0.946, N=238
safety_car  : P=0.000, R=0.000, F1=0.000, N=13
yellow      : P=0.000, R=0.000, F1=0.000, N=14

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green              238                0            0
True_safety_car          13                0            0
True_yellow              14                0            0

üíæ Results saved:
  Complete: evaluation_



Generating predictions...

üìä OVERALL PERFORMANCE
Accuracy:    0.8113
F1-Macro:    0.6413
F1-Weighted: 0.8477

üéØ TARGET CLASS ANALYSIS: SAFETY_CAR
Precision:       0.2391
Recall:          0.8462
F1-Score:        0.3729
True Positives:    11
False Negatives:    2 (missed safety_car events)
False Positives:   35 (false safety_car alarms)
True Negatives:   217

üìà PER-CLASS PERFORMANCE
green       : P=0.985, R=0.803, F1=0.884, N=238
safety_car  : P=0.239, R=0.846, F1=0.373, N=13
yellow      : P=0.520, R=0.929, F1=0.667, N=14

üîç CONFUSION MATRIX
                 Pred_green  Pred_safety_car  Pred_yellow
True_green              191               35           12
True_safety_car           2               11            0
True_yellow               1                0           13

üíæ Results saved:
  Complete: evaluation_results/logistic_regression_20250702_221117_complete.json
  Summary:  evaluation_results/logistic_regression_20250702_221117_summary.txt

EVALUATING MODEL: random_for