In [1]:
# Import standard libraries
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# Import custom modules
from data_preparation import load_and_preprocess_data, prepare_sequence_data, split_data_by_race
from features import RaceFeatures
from lstm import F1PredictionModel, F1Dataset, F1DataPreprocessor, train_model
from evaluation import evaluate_model, plot_predictions
from model_optimization import *

def main():
    # Load and preprocess data
    print("Loading and preprocessing data...")
    df = load_and_preprocess_data()

    # Split data by race to prevent data leakage
    print("Splitting data...")
    train_val_df, test_df = split_data_by_race(df, test_size=0.2, random_state=42)
    train_df, val_df = split_data_by_race(train_val_df, test_size=0.2, random_state=42)

    # Initialize preprocessor and features
    preprocessor = F1DataPreprocessor()
    race_features = RaceFeatures()

    # Prepare sequence data
    print("Preparing sequence data...")
    sequences_train, static_train, targets_train = prepare_sequence_data(train_df, race_features, window_size=3)
    sequences_val, static_val, targets_val = prepare_sequence_data(val_df, race_features, window_size=3)
    sequences_test, static_test, targets_test = prepare_sequence_data(test_df, race_features, window_size=3)

    # Fit scalers on training data and transform all datasets
    print("Scaling data...")
    preprocessor.fit_scalers(sequences_train, static_train, targets_train)
    
    sequences_train_scaled, static_train_scaled, targets_train_scaled = preprocessor.transform_data(
        sequences_train, static_train, targets_train)
    sequences_val_scaled, static_val_scaled, targets_val_scaled = preprocessor.transform_data(
        sequences_val, static_val, targets_val)
    sequences_test_scaled, static_test_scaled, targets_test_scaled = preprocessor.transform_data(
        sequences_test, static_test, targets_test)

    # Create datasets
    train_dataset = F1Dataset(sequences_train_scaled, static_train_scaled, targets_train_scaled)
    val_dataset = F1Dataset(sequences_val_scaled, static_val_scaled, targets_val_scaled)
    test_dataset = F1Dataset(sequences_test_scaled, static_test_scaled, targets_test_scaled)

    # Get dimensions for model
    sequence_dim = sequences_train_scaled.shape[2]
    static_dim = static_train_scaled.shape[1]

    # Optimize hyperparameters
    print("Optimizing hyperparameters...")
    best_params = optimize_hyperparameters(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        sequence_dim=sequence_dim,
        static_dim=static_dim,
        n_trials=50
    )
    print("Best parameters:", best_params)

    # Create final model with optimized parameters
    final_model = F1PredictionModel(
        sequence_dim=sequence_dim,
        static_dim=static_dim,
        hidden_dim=best_params['hidden_dim'],
        num_layers=best_params['num_layers'],
        dropout_prob=best_params['dropout_prob']
    )

    # Create data loaders for final training
    train_loader = DataLoader(
        train_dataset,
        batch_size=best_params['batch_size'],
        shuffle=True
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=best_params['batch_size'],
        shuffle=False
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=best_params['batch_size'],
        shuffle=False
    )

    # Train final model
    print("Training final model...")
    history = train_model(
        final_model,
        train_loader,
        val_loader,
        epochs=20,
        learning_rate=best_params['learning_rate']
    )

    # Evaluate on test set
    print("Evaluating model...")
    final_model.eval()
    predictions = []
    true_values = []

    with torch.no_grad():
        for batch in test_loader:
            sequences = batch['sequence']
            static = batch['static']
            targets = batch['target']

            outputs = final_model(sequences, static)
            predictions.extend(outputs.numpy())
            true_values.extend(targets.numpy())

    # Inverse transform predictions and true values
    predictions = preprocessor.lap_time_scaler.inverse_transform(
        np.array(predictions).reshape(-1, 1)).flatten()
    true_values = preprocessor.lap_time_scaler.inverse_transform(
        np.array(true_values).reshape(-1, 1)).flatten()

    # Calculate and display evaluation metrics
    metrics = evaluate_model(true_values, predictions)
    print("Test set metrics:", metrics)

    # Plot results
    plot_predictions(true_values, predictions, model_name='LSTM Model')

    # Save the model
    save_model_with_preprocessor(
        final_model,
        preprocessor,
        sequence_dim,
        static_dim,
        'lstm_model_optimized.pth'
    )

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'optuna'