In [1]:
# Import standard libraries
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader

# Import custom modules
from data_preparation import load_and_preprocess_data, prepare_sequence_data, split_data_by_race, save_data_splits
from features import RaceFeatures
from lstm import F1PredictionModel, F1Dataset, F1DataPreprocessor, train_model, save_model_with_preprocessor
from evaluation import evaluate_model, plot_predictions

def main():
   # Load and preprocess data
   print("Loading and preprocessing data...")
   df = load_and_preprocess_data()

   # Split data by race to prevent data leakage
   print("Splitting data...")
   train_df, test_df = split_data_by_race(df, test_size=0.2, random_state=42)
   save_data_splits(train_df, test_df)

   # Initialize preprocessor and features
   preprocessor = F1DataPreprocessor()
   race_features = RaceFeatures()

   # Prepare sequence data
   print("Preparing sequence data...")
   sequences_train, static_train, targets_train = prepare_sequence_data(train_df, race_features, window_size=3)
   sequences_test, static_test, targets_test = prepare_sequence_data(test_df, race_features, window_size=3)

   # Fit scalers on training data and transform all datasets
   print("Scaling data...")
   preprocessor.fit_scalers(sequences_train, static_train, targets_train)
   
   sequences_train_scaled, static_train_scaled, targets_train_scaled = preprocessor.transform_data(
       sequences_train, static_train, targets_train)
   sequences_test_scaled, static_test_scaled, targets_test_scaled = preprocessor.transform_data(
       sequences_test, static_test, targets_test)

   # Create datasets
   train_dataset = F1Dataset(sequences_train_scaled, static_train_scaled, targets_train_scaled)
   test_dataset = F1Dataset(sequences_test_scaled, static_test_scaled, targets_test_scaled)

   # Initialize the model with default parameters
   model = F1PredictionModel(
       sequence_dim=sequences_train_scaled.shape[2],
       static_dim=static_train_scaled.shape[1],
       hidden_dim=64,
       num_layers=10,
       dropout_prob=0.5
   )

   # Create data loaders
   train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
   test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

   # Train model
   print("Training model...")
   history = train_model(
       model,
       train_loader,
       test_loader,  # Using test_loader as validation for now
       epochs=10,
       learning_rate=0.001
   )

   # Evaluate on test set
   print("Evaluating model...")
   model.eval()
   predictions = []
   true_values = []

   with torch.no_grad():
       for batch in test_loader:
           sequences = batch['sequence']
           static = batch['static']
           targets = batch['target']

           outputs = model(sequences, static)
           predictions.extend(outputs.numpy())
           true_values.extend(targets.numpy())

   # Inverse transform predictions and true values
   predictions = preprocessor.lap_time_scaler.inverse_transform(
       np.array(predictions).reshape(-1, 1)).flatten()
   true_values = preprocessor.lap_time_scaler.inverse_transform(
       np.array(true_values).reshape(-1, 1)).flatten()

   # Calculate and display evaluation metrics
   metrics = evaluate_model(true_values, predictions)
   print("Test set metrics:", metrics)

   # Plot results
   plot_predictions(true_values, predictions, model_name='LSTM Model')

   # Save the model
   save_model_with_preprocessor(
       model,
       preprocessor,
       sequences_train_scaled.shape[2],
       static_train_scaled.shape[1],
       'lstm_model.pth'
   )

if __name__ == "__main__":
   main()

Loading and preprocessing data...
(586171, 15)
(586171, 32)
(586171, 40)
(586171, 45)
(586171, 46)
(586171, 47)
(586171, 47)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  laps['pitstop_milliseconds'].fillna(0, inplace=True)  # Assuming 0 if no pit stop
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  laps['constructor_points'].fillna(laps['constructor_points'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will ne

(586171, 56)


KeyError: 'Compound'