In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

### LSTMModel interface

In [2]:
class LSTMModel(nn.Module):
    """
    A PyTorch implementation of a Long Short-Term Memory (LSTM) model for time-series forecasting.
    """

    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers,
                 output_size,
                 dropout=0):

        super(LSTMModel, self).__init__()

        self.encoder = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )

        self.decoder = nn.LSTM(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )

        self.output_layer = nn.Linear(hidden_size, input_size)

        self.hidden_size = hidden_size
        self.output_size = output_size
        self.input_size = input_size
        self.num_layers = num_layers
        self.dropout = dropout

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.shape

        # Encode: Get the final hidden state as the compressed representation
        _, (hidden, cell) = self.encoder(x)

        # Prepare decoder input: repeat the encoded representation for each timestep
        # Use the last hidden state as the encoded representation
        encoded = hidden[-1].unsqueeze(1).repeat(1, seq_len, 1)  # Shape: (batch, seq_len, hidden_size)

        # Decode: Reconstruct the sequence
        decoder_output, _ = self.decoder(encoded)

        # Map back to original feature space
        reconstructed = self.output_layer(decoder_output)

        return reconstructed

    def get_reconstruction_error(self, x):
        """Calculate reconstruction error for anomaly detection"""
        with torch.no_grad():
            reconstructed = self.forward(x)
            # Calculate MSE for each sequence
            mse = torch.mean((x - reconstructed) ** 2, dim=(1, 2))
            return mse.cpu().numpy()  # Changed from .gpu() to .cpu()

    def encode(self, x):
        """Get the encoded representation of input sequences"""
        with torch.no_grad():
            _, (hidden, _) = self.encoder(x)
            return hidden[-1]  # Return the last layer's hidden state

### Configs

In [3]:
# =============================================================================
# CONFIGURATION PARAMETERS - MODIFY THESE TO EXPERIMENT
# =============================================================================

# Data parameters
DATA_PATH = "LSTM_preprocessed.parquet"
EXPERIMENT_NAME = "lstm_per_route_v1"

# FEATURE_COLUMNS = ['latitude', 'longitude',
#                    'speed_over_ground', 'course_over_ground'
#                    ]

FEATURE_COLUMNS = [ "speed_over_ground", "course_over_ground",
                    "x_km", "y_km", "dist_to_ref", "zone"
                  ]

# Sequence parameters
SEQUENCE_LENGTH = 15        # Number of time steps in each sequence

# Training parameters
EPOCHS = 10                 # Maximum epochs
BATCH_SIZE = 16            
PATIENCE = 5                # Early stopping patience
VALIDATION_SIZE = 0.2       # Proportion of data for validation

HIDDEN_SIZE = 64
NUM_LAYERS = 1 

# Evaluation parameters
AUTOENCODER_THRESHOLD_PERCENTILE = 95

# Output directories
OUTPUT_DIR_AE = "models_per_route_lstm_ae"    # Autoencoder models

scaler = MinMaxScaler()

print(f"Configuration loaded for experiment: {EXPERIMENT_NAME}")
print(f"Training epochs: {EPOCHS}, Batch size: {BATCH_SIZE}")
print(f"Output directories: {OUTPUT_DIR_AE}")

Configuration loaded for experiment: lstm_per_route_v1
Training epochs: 10, Batch size: 16
Output directories: models_per_route_lstm_ae


### Utils

In [4]:
def create_sequences(data, seq_length):
    """
    Creates sequences from time-series data for training recurrent neural networks.
    """
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        xs.append(data[i : i + seq_length])
        ys.append(data[i + seq_length])
    return np.array(xs), np.array(ys)

def divide_validation(X, y):
    X_train, y_train, X_val, y_val = [], [], [], []
    # Split the data into training and validation sets

def divide_normal_anomaly(df):
    normal_trip_ids = df.groupby('trip_id')['y_true'].all()
    normal_trip_ids = normal_trip_ids[normal_trip_ids == True].index
    df_completely_normal_trips = df[df['trip_id'].isin(normal_trip_ids)].copy()

    anomaly_trip_ids = df.groupby('trip_id')['y_true'].any()
    anomaly_trip_ids = anomaly_trip_ids[anomaly_trip_ids == True].index
    df_trips_with_anomalies = df[df['trip_id'].isin(anomaly_trip_ids)].copy()


    print(f"Total trips: {len(df['trip_id'].unique())}")
    print(f"Normal trips: {len(df_completely_normal_trips['trip_id'].unique())}")
    print(f"Anomaly trips: {len(df_trips_with_anomalies['trip_id'].unique())}")
    
    return df_completely_normal_trips, df_trips_with_anomalies
    
def get_data_loader(df):
    data_features = df[FEATURE_COLUMNS].values
    data_scaled = scaler.fit_transform(data_features)

    X, y = create_sequences(data_scaled, SEQUENCE_LENGTH)

    X_tensor = torch.from_numpy(X).float()
    y_tensor = torch.from_numpy(y).float()
    
    dataset = TensorDataset(X_tensor, y_tensor)

    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)

    return loader

def load_data_route(route, datapath = DATA_PATH):
    df = pd.read_parquet(DATA_PATH)
    df.sort_values(['trip_id', 'time_stamp'], inplace=True)
    df_route = df[df['start_port'] == route].copy()

    return df_route

def get_anomaly_example(df_anomalies):
    
    one_anomaly = df_anomalies[df_anomalies['trip_id'] == df_anomalies['trip_id'].unique()[0]].copy()
    one_anomaly['lstm_prediction'] = 0
    
    one_anomaly = one_anomaly.sort_values('time_stamp')
    one_an_scaled = scaler.transform(one_anomaly[FEATURE_COLUMNS].values)
    X_anomaly, y_anomaly = create_sequences(one_an_scaled, SEQUENCE_LENGTH)
    X_anomaly_tensor = torch.from_numpy(X_anomaly).float()

    return one_anomaly, X_anomaly_tensor, y_anomaly
    
def anomaly_check(one_anomaly):
    print("LSTM Predictions Summary:")
    print(f"Total points: {len(one_anomaly)}")
    print(f"LSTM predicted anomalies: {(one_anomaly['lstm_prediction'] == 1).sum()}")
    print(f"Ground truth anomalies: {(one_anomaly['y_true'] == 1).sum()}")

    # Calculate some basic metrics
    true_positives = ((one_anomaly['y_true'] == 1) & (one_anomaly['lstm_prediction'] == 1)).sum()
    false_positives = ((one_anomaly['y_true'] == 0) & (one_anomaly['lstm_prediction'] == 1)).sum()
    false_negatives = ((one_anomaly['y_true'] == 1) & (one_anomaly['lstm_prediction'] == 0)).sum()

    print(f"True positives: {true_positives}")
    print(f"False positives: {false_positives}")
    print(f"False Negatives: {false_negatives}")

In [15]:
def train(model,
          train_loader, 
          loss_function,
          optimizer,
          X_anomaly_tensor, 
          one_anomaly):
    best_val_loss = float('inf')
    patience_counter = 0
    
    # Enable cudnn benchmarking for better performance
    torch.backends.cudnn.benchmark = True
    
    # Mixed precision training for better memory usage
    # scaler_amp = torch.cuda.amp.GradScaler()
    
    for epoch in range(EPOCHS):
        # Training phase
        model.train()
        train_loss = 0.0
        num_batches = 0
    
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
        train_reconstruction_errors = []
    
        for batch_idx, (batch_x, _) in enumerate(pbar):
            # Move batch to GPU
            batch_x = batch_x.to(device)
    
            # Mixed precision forward pass
            with torch.cuda.amp.autocast():
                reconstructed = model(batch_x)
                loss = loss_function(reconstructed, batch_x)
    
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            # Gradient clipping to prevent exploding gradients
            # scaler_amp.unscale_(optimizer)
            # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
            # scaler_amp.step(optimizer)
            # scaler_amp.update()
    
            batch_errors = model.get_reconstruction_error(batch_x)
            train_reconstruction_errors.extend(batch_errors)
            
            train_loss += loss.item()
            num_batches += 1
    
            # Update progress bar
            if batch_idx % 10 == 0:
                pbar.set_postfix({'loss': f'{train_loss/num_batches:.6f}'})
    
            # Clear cache periodically
            if batch_idx % 100 == 0:
                torch.cuda.empty_cache()
    
        # Validation phase
        model.eval()

        # val_loss = 0.0
        # num_val_batches = 0

        # with torch.no_grad():
        #     for batch_x, _ in val_loader:
        #         batch_x = batch_x.to(device)
        #         reconstructed = model(batch_x)
        #         loss = loss_function(reconstructed, batch_x)
        #         val_loss += loss.item()
        #         num_val_batches += 1
        #
        # avg_train_loss = train_loss / num_batches
        # avg_val_loss = val_loss / num_val_batches
        #
        # print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.6f}, Val Loss = {avg_val_loss:.6f}")
        #
        # # Learning rate scheduling
        # scheduler.step(avg_val_loss)
        #
        # # Early stopping
        # if avg_val_loss < best_val_loss:
        #     best_val_loss = avg_val_loss
        #     patience_counter = 0
        #     # Save best model
        #     torch.save(model.state_dict(), 'best_model.pth')
        # else:
        #     patience_counter += 1
        #     if patience_counter >= PATIENCE:
        #         print(f"Early stopping at epoch {epoch+1}")
        #         break
    
        # train_reconstruction_errors = model.get_reconstruction_error(X_train_tensor)
        # threshold = np.percentile(train_reconstruction_errors, AUTOENCODER_THRESHOLD_PERCENTILE)
    
        train_reconstruction_errors = np.array(train_reconstruction_errors)
        threshold = np.percentile(train_reconstruction_errors, AUTOENCODER_THRESHOLD_PERCENTILE)
        print(f"Anomaly threshold: {threshold:.6f}")
    
        reconstruction_errors = model.get_reconstruction_error(X_anomaly_tensor)
        print(f"Anomaly reconstruction error: {reconstruction_errors}")
    
        anomaly_mask = reconstruction_errors > threshold
        anomaly_indices = np.where(anomaly_mask)[0]
    
        # Mark points as anomalous based on which sequences they belong to
        for seq_idx in anomaly_indices:
            start_point = seq_idx
            end_point = min(seq_idx + SEQUENCE_LENGTH, len(one_anomaly))
    
            # Mark all points in this sequence as anomalous
            one_anomaly.iloc[start_point:end_point, one_anomaly.columns.get_loc('lstm_prediction')] = 1
    
        anomaly_check(one_anomaly)
    
    # Load best model
    # model.load_state_dict(torch.load('best_model.pth'))
    print("Training completed!")

def evaluate_model(model, train_loader):
    model.eval()
    train_reconstruction_errors = []
    
    with torch.no_grad():
        for batch_x, _ in train_loader:
            batch_x = batch_x.to(device)
            batch_errors = model.get_reconstruction_error(batch_x)
            train_reconstruction_errors.extend(batch_errors)
    
    train_reconstruction_errors = np.array(train_reconstruction_errors)
    threshold = np.percentile(train_reconstruction_errors, AUTOENCODER_THRESHOLD_PERCENTILE)
    print(f"Anomaly threshold: {threshold:.6f}")

    return threshold

### Data load and devide

In [6]:
df_KIEL = load_data_route("KIEL")
df_BREM = load_data_route("BREMERHAVEN")

print("BREMERHAVEN")
df_normal_B, df_anomalies_B = divide_normal_anomaly(df_BREM)
train_loader_B = get_data_loader(df_normal_B)

print("\nKIEL")
df_normal_K, df_anomalies_K = divide_normal_anomaly(df_KIEL)
train_loader_K = get_data_loader(df_normal_K)

BREMERHAVEN
Total trips: 702
Normal trips: 650
Anomaly trips: 14

KIEL
Total trips: 420
Normal trips: 356
Anomaly trips: 62


In [7]:
anomaly_B, X_anomaly_tensor_B, _ = get_anomaly_example(df_anomalies_B)
anomaly_K, X_anomaly_tensor_K, _ = get_anomaly_example(df_anomalies_K)

### Model configure and train

In [8]:
model_BREM = LSTMModel(input_size=len(FEATURE_COLUMNS), hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, output_size=len(FEATURE_COLUMNS))
loss_function_BREM = nn.MSELoss()
optimizer_BREM = torch.optim.Adam(model_BREM.parameters(), lr=0.001)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

model_KIEL = LSTMModel(input_size=len(FEATURE_COLUMNS), hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, output_size=len(FEATURE_COLUMNS))
loss_function_KIEL = nn.MSELoss()
optimizer_KIEL = torch.optim.Adam(model_KIEL.parameters(), lr=0.001)

In [9]:
# Move to CUDA
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model_BREM = model_BREM.to(device)
model_KIEL = model_KIEL.to(device)


X_anomaly_tensor_B = X_anomaly_tensor_B.to(device)
X_anomaly_tensor_K = X_anomaly_tensor_K.to(device)

cuda


In [None]:
train(model_BREM, train_loader_B, loss_function_BREM, optimizer_BREM, X_anomaly_tensor_B, anomaly_B)

In [18]:
trashhold_BREM = evaluate_model(model_BREM, train_loader_B)

Anomaly threshold: 0.025404


In [None]:
train(model_KIEL, train_loader_K, loss_function_KIEL, optimizer_KIEL, X_anomaly_tensor_K, anomaly_K)

In [16]:
trashhold_KEIL = evaluate_model(model_KIEL, train_loader_K)

TypeError: evaluate_model() missing 1 required positional argument: 'train_loader'

In [None]:
# # Prepare anomaly data
# anomaly_data = one_anomaly[FEATURE_COLUMNS].values
# anomaly_data_scaled = scaler.transform(anomaly_data)
#
# if len(anomaly_data_scaled) >= SEQUENCE_LENGTH:
#     X_anomaly, _ = create_sequences(anomaly_data_scaled, SEQUENCE_LENGTH)
#
#     # Process in batches to avoid OOM
#     batch_size_test = 64
#     anomaly_mask = []
#
#     for i in range(0, len(X_anomaly), batch_size_test):
#         batch = X_anomaly[i:i+batch_size_test]
#         batch_tensor = torch.FloatTensor(batch).to(device)
#
#         with torch.no_grad():
#             batch_errors = model.get_reconstruction_error(batch_tensor)
#             batch_mask = batch_errors > threshold
#             anomaly_mask.extend(batch_mask)
#
#         # Clear cache after each batch
#         torch.cuda.empty_cache()
#
#     anomaly_mask = np.array(anomaly_mask)
#     anomaly_indices = np.where(anomaly_mask)[0]
#
#     # Initialize prediction column
#     one_anomaly['lstm_prediction'] = 0
#
#     # Mark anomalous sequences
#     for seq_idx in anomaly_indices:
#         start_point = seq_idx
#         end_point = min(seq_idx + SEQUENCE_LENGTH, len(one_anomaly))
#         one_anomaly.iloc[start_point:end_point, one_anomaly.columns.get_loc('lstm_prediction')] = 1

In [None]:
import joblib
from pathlib import Path

# Save to file
output_dir = Path("models_per_route_lstm_ae")
output_dir.mkdir(exist_ok=True)

def save_model(model, scaler, threshold, route_name):
    # Move model to CPU before saving
    model.cpu()
    
    # Save everything needed for inference
    lstm_artifacts = {
        "model_state": model.state_dict(),
        "scaler": scaler,
        "threshold": threshold,
        "model_config": {
            "input_size": len(FEATURE_COLUMNS),
            "hidden_size": HIDDEN_SIZE, 
            "num_layers": NUM_LAYERS,
            "sequence_length": SEQUENCE_LENGTH,
            "threshold_percentile": AUTOENCODER_THRESHOLD_PERCENTILE
        },
        "features": FEATURE_COLUMNS,
        "model_type": "lstm"
    }
    

    # Save for the specific route
    model_filename = output_dir / f"{route_name}_lstm_model.pkl"
    joblib.dump(lstm_artifacts, model_filename)

    print(f"LSTM model saved to {model_filename}")
    return model_filename

In [None]:
models_filenames = []
dispatcher_lstm = {}

# Save BREMERHAVEN model
model_filename_BREM = save_model(model_BREM, scaler, trashhold_BREM, "BREMERHAVEN")
models_filenames.append(model_filename_BREM)
dispatcher_lstm["BREMERHAVEN"] = str(model_filename_BREM)

# Save KIEL model
model_filename_KIEL = save_model(model_KIEL, scaler, trashhold_KIEL, "KIEL")
models_filenames.append(model_filename_KIEL)
dispatcher_lstm["KIEL"] = str(model_filename_KIEL)

# Save dispatcher
dispatcher_file = output_dir / "dispatcher.pkl"
joblib.dump(dispatcher_lstm, dispatcher_file)
print(f"Dispatcher saved to {dispatcher_file}")