In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

class FastF1Dataset(Dataset):
    def __init__(self, data, sequence_length):
        self.data = data
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length + 1

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx:idx + self.sequence_length], dtype=torch.float32)

# Load your saved model
def load_model(model_path, model_class, sequence_length, input_dim, device):
    model = model_class(sequence_length, input_dim)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

# Calculate reconstruction error
def compute_reconstruction_error(inputs, outputs):
    return torch.mean(torch.abs(inputs - outputs), dim=(1, 2)).detach().numpy()


# Define your autoencoder class
class LSTMAutoencoder(nn.Module):
    def __init__(self, sequence_length, num_features):
        super(LSTMAutoencoder, self).__init__()
        self.encoder = nn.LSTM(num_features, 64, batch_first=True)
        self.latent = nn.LSTM(64, 32, batch_first=True)
        self.decoder = nn.LSTM(32, 64, batch_first=True)
        self.output_layer = nn.Linear(64, num_features)

    def forward(self, x):
        x, _ = self.encoder(x)
        x, _ = self.latent(x[:, -1].unsqueeze(1).repeat(1, x.size(1), 1))
        x, _ = self.decoder(x)
        x = self.output_layer(x)
        return x

# Set paths and parameters
model_path = "models/autoencoder_AdamW_lr0001_loss0.4037_fold5.pth"
new_data_path = "D:/F1LLM_Datasets/npz_normalized/train_data_only_failures/2024_LasVegasGrandPrix_MinMaxScaler_normalized_10_Engine.npz"

sequence_length = 20
threshold = 0.2061

# Load the new data
new_data = np.load(new_data_path, allow_pickle=True)['data']
new_data = new_data[:, :-1]
new_data_array = np.array(new_data, dtype=np.float32)

# Create the test dataset and data loader
test_dataset = FastF1Dataset(new_data_array, sequence_length)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = new_data_array.shape[1]  # Number of features per timestep
autoencoder = load_model(model_path, LSTMAutoencoder, sequence_length, input_dim, device).to(device)

# Test the model
def test_autoencoder(autoencoder, data_loader, device):
    autoencoder.eval()
    errors = []

    with torch.no_grad():
        for batch in data_loader:
            inputs = batch.to(device)
            outputs = autoencoder(inputs)
            batch_errors = compute_reconstruction_error(inputs, outputs)
            errors.extend(batch_errors)

    return np.array(errors)

reconstruction_errors = test_autoencoder(autoencoder, test_loader, device)

In [None]:
# new_data (npz) to pandas
import pandas as pd
race_df = pd.DataFrame(new_data)

cols = [
       'Time_in_ms',
       'LapTime_in_ms',
       'LapNumber',
       'Position',
       'Speed',
       'AirTemp',
       'Humidity',
       'Pressure',
       'TrackTemp',
       'WindDirection',
       'WindSpeed',
       'DistanceToDriverAhead',
       'RPM',
       'nGear',
       'Throttle',
       'X', 
       'Y', 
       'Z', 
       'Distance', 
       'TyreLife',
       'Compound',
       'Team',
       'TrackStatus',
       'Rainfall',
       'DriverAhead',
       'Brake',
       'DRS',
       'Event'
    ]

In [None]:
def plot_reconstruction_errors_with_threshold(errors, threshold=None):
    """
    Plots reconstruction errors as a line graph with an optional threshold.

    Parameters:
        errors (np.array): Array of reconstruction errors.
        threshold (float, optional): Threshold for detecting anomalies. Defaults to None.
    """
    plt.figure(figsize=(30, 12))
    plt.plot(errors, label="Reconstruction Errors", color='blue', linewidth=4)

    if threshold is not None:
        plt.axhline(y=threshold, color='red', linestyle='--', label="Threshold", linewidth=4)

    plt.xlabel("Sample Index", fontsize=40)
    plt.xticks(range(0, len(errors), 1000), fontsize=35)
    plt.yticks(fontsize=35)
    plt.ylabel("Reconstruction Error", fontsize=40)

    # Aggiungi legenda una sola volta
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    #change legend font size
    plt.legend(by_label.values(), by_label.keys(), prop={'size': 35})
    plt.subplots_adjust(left=0.07, right=0.93, top=0.9, bottom=0.1)
    #show the axis
    plt.axvline(x=0, color='black', linewidth=2)
    plt.axhline(y=0, color='black', linewidth=2)

    plt.grid(alpha=0.3)
    plt.savefig("reconstruction_errors.png")
    plt.show()

In [None]:
# Print anomalies
anomalies = reconstruction_errors > threshold
print(f"Total anomalies detected: {np.sum(anomalies)}")

In [None]:
plot_reconstruction_errors_with_threshold(reconstruction_errors, threshold=threshold)