## **Import**

In [1]:
import csv
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
import pandas as pd
import random
import os
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import random
import math
from torch.nn.utils import weight_norm
from torch.utils.tensorboard import SummaryWriter
import os
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import matplotlib.pyplot as plt
import shutil
from ipywidgets import interact, fixed
from ipywidgets import widgets
from ipywidgets import interactive, widgets
from IPython.display import display
from ipywidgets import interactive, widgets, HBox, VBox

from datetime import datetime
import socket
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    print(torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

Using device: cuda
Using GPU: Tesla V100-SXM2-16GB
1
GPU 0: Tesla V100-SXM2-16GB


## **Proprocess Data to Raw Data for Training**

In [2]:
PIXEL_TO_METER_SCALE = 13.913
image = Image.open("data/map/hkust_4f.jpg")
image = image.resize((int(image.size[0] / PIXEL_TO_METER_SCALE), 
                      int(image.size[1] / PIXEL_TO_METER_SCALE))).transpose(Image.FLIP_TOP_BOTTOM)

def process_csv_file(csv_path, image_size, pixel_to_meter_scale):
    path_data = {'x':[], 'y':[], "Bv":[], "Bh":[], "Bp":[]}
    
    with open(csv_path, 'r') as file:
        reader = csv.reader(file)
        header = next(reader)  # Skip the header
        
        for row in reader:
            x = float(row[3]) / pixel_to_meter_scale
            y = - float(row[4]) / pixel_to_meter_scale + image_size[1]
            Bv = float(row[0])
            Bh = float(row[1])
            Bp = float(row[2])
            
            path_data["x"].append(x)
            path_data["y"].append(y)
            path_data["Bv"].append(Bv)
            path_data["Bh"].append(Bh)
            path_data["Bp"].append(Bp)
    
    return path_data


train_raw_data = []
data_path = os.path.join(".", "data", "formatted", "HKUST_4F", "training data")

for root, _, files in os.walk(data_path):
    for file in files:
        if file.endswith('.csv'):
            train_raw_data.append(process_csv_file(os.path.join(root, file), image.size, PIXEL_TO_METER_SCALE))

test_raw_data = []
test_data_path = os.path.join(".", "data", "formatted", "HKUST_4F", "testing data")

for root, _, files in os.walk(test_data_path):
    for file in files:
        if file.endswith('.csv'):
            test_raw_data.append(process_csv_file(os.path.join(root, file), image.size, PIXEL_TO_METER_SCALE))
            

In [None]:
# Function to print the first 10 entries of a trajectory
def print_first_10_entries(traj, dataset_name):
    print(f"\n{dataset_name} Trajectory:")
    for i in range(min(10, len(traj['x']))):
        print(f"Entry {i+1}: x={traj['x'][i]:.2f}, y={traj['y'][i]:.2f}, Bv={traj['Bv'][i]:.2f}, Bh={traj['Bh'][i]:.2f}, Bp={traj['Bp'][i]:.2f}")

# Randomly select 3 trajectories from the training dataset
print("Training Dataset:")
for i in range(3):
    traj = random.choice(train_raw_data)
    print_first_10_entries(traj, f"Training Trajectory {i+1}")

# Randomly select 3 trajectories from the testing dataset
print("\nTesting Dataset:")
for i in range(3):
    traj = random.choice(test_raw_data)
    print_first_10_entries(traj, f"Testing Trajectory {i+1}")

## **Analyze Differences in Distributions between Train and Test**

In [None]:
import numpy as np

def calculate_stats(data_list, key):
    values = np.concatenate([np.array(d[key]) for d in data_list])
    return {
        'mean': np.mean(values),
        'std': np.std(values),
        'min': np.min(values),
        'max': np.max(values)
    }

keys = ['x', 'y', 'Bv', 'Bh', 'Bp']

print("Training Dataset Statistics:")
for key in keys:
    stats = calculate_stats(train_raw_data, key)
    print(f"{key}: mean={stats['mean']:.2f}, std={stats['std']:.2f}, min={stats['min']:.2f}, max={stats['max']:.2f}")

print("Testing Dataset Statistics:")
for key in keys:
    stats = calculate_stats(test_raw_data, key)
    print(f"{key}: mean={stats['mean']:.2f}, std={stats['std']:.2f}, min={stats['min']:.2f}, max={stats['max']:.2f}")

## **Length of the Raw Data**

In [None]:
def print_data_info(data, name):
    total_length = sum(len(d['x']) for d in data)
    individual_lengths = [len(d['x']) for d in data]
    num_trajectories = len(data)
    
    print(f"{name} data:")
    print(f"  Total length: {total_length}")
    print(f"  Individual lengths: {individual_lengths}")
    print(f"  Number of trajectories: {num_trajectories}")
    print()


print_data_info(train_raw_data, "Train Raw")
print_data_info(test_raw_data, "Test Raw")

In [None]:
def visualize_data(data):
    def update_plot(index):
        plt.figure(figsize=(10, 10))
        
        # Scatter plot for all points
        plt.scatter(data[index]["x"], data[index]["y"], s=30, alpha=0.5, label='Steps')
        
        # Highlight start and end points
        plt.scatter(data[index]["x"][0], data[index]["y"][0], color='green', s=100, label='Start')
        plt.scatter(data[index]["x"][-1], data[index]["y"][-1], color='red', s=100, label='End')
        
        plt.legend()
        plt.title(f"Trajectory at index {index}")
        plt.xlabel("X coordinate")
        plt.ylabel("Y coordinate")
        plt.grid(True)
        plt.axis('equal')  # This ensures the aspect ratio is 1:1
        plt.show()

    slider = widgets.IntSlider(
        value=0,
        min=0,
        max=len(data) - 1,
        step=1,
        description='Index:',
        continuous_update=False
    )

    widget = widgets.interactive(update_plot, index=slider)
    display(widget)

visualize_data(train_raw_data)
# visualize_data(test_raw_data)

## **Process Data to Sequences**

In [3]:
def prepare_sequences(data, sequence_length):
    X, y = [], []
    for traj in data:
        input_seq = np.column_stack((traj['Bv'], traj['Bh'], traj['Bp']))
        output_seq = np.column_stack((traj['x'], traj['y']))
        
        for i in range(len(input_seq) - sequence_length):
            X.append(input_seq[i:i+sequence_length])
            y.append(output_seq[i+sequence_length])
    
    return np.array(X), np.array(y)

# Set sequence length
sequence_length = 30

# Prepare training data
X_train_val, y_train_val = prepare_sequences(train_raw_data, sequence_length)

# Split training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

# Prepare test data
X_test, y_test = prepare_sequences(test_raw_data, sequence_length)

# Normalize input data
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_val_scaled = scaler_X.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Normalize output data
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

print("Training data shape:", X_train_scaled.shape, y_train_scaled.shape)
print("Validation data shape:", X_val_scaled.shape, y_val_scaled.shape)
print("Testing data shape:", X_test_scaled.shape, y_test_scaled.shape)


Training data shape: (122406, 30, 3) (122406, 2)
Validation data shape: (30602, 30, 3) (30602, 2)
Testing data shape: (17777, 30, 3) (17777, 2)


### *What you get at this step is sequence of data of `Bv`, `Bp` and `Bh` followed by an `x` and `y` output*

## **Plot and Visualize Sequences**

In [None]:
def plot_single_dataset(index, data_type, dataset):
    plt.figure(figsize=(15, 5))

    if data_type == 'Raw':
        X = X_train if dataset == 'Train' else X_val if dataset == 'Validation' else X_test
        y = y_train if dataset == 'Train' else y_val if dataset == 'Validation' else y_test
    else:
        X = X_train_scaled if dataset == 'Train' else X_val_scaled if dataset == 'Validation' else X_test_scaled
        y = y_train_scaled if dataset == 'Train' else y_val_scaled if dataset == 'Validation' else y_test_scaled

    # Plot input sequence
    plt.subplot(1, 2, 1)
    plt.title(f'{data_type} Input Sequence - {dataset}')
    plt.plot(X[index, :, 0], label='Bv')
    plt.plot(X[index, :, 1], label='Bh')
    plt.plot(X[index, :, 2], label='Bp')
    plt.legend()
    plt.xlabel('Steps')
    plt.ylabel('Value')

    # Plot output
    plt.subplot(1, 2, 2)
    plt.title(f'{data_type} Output - {dataset}')
    plt.scatter(y[index, 0], y[index, 1], color='red', label='Position')
    plt.legend()
    plt.xlabel('X')
    plt.ylabel('Y')

    plt.tight_layout()
    plt.show()

def create_interactive_plot(dataset):
    max_index = len(X_train)-1 if dataset == 'Train' else len(X_val)-1 if dataset == 'Validation' else len(X_test)-1
    
    index_input = widgets.BoundedIntText(
        value=0,
        min=0,
        max=max_index,
        description=f'{dataset} Index:',
        style={'description_width': 'initial'}
    )
    
    index_slider = widgets.IntSlider(
        value=0,
        min=0,
        max=max_index,
        description='Progress:',
        style={'description_width': 'initial'}
    )
    
    # Link the input and slider
    widgets.jslink((index_input, 'value'), (index_slider, 'value'))
    
    data_type_widget = widgets.RadioButtons(options=['Raw', 'Scaled'], description='Data Type:')
    
    def update_plot(index, data_type):
        plot_single_dataset(index, data_type, dataset)
    
    interactive_plot = interactive(update_plot, index=index_input, data_type=data_type_widget)
    
    return VBox([
        widgets.HTML(f"<h3>{dataset} Dataset</h3>"),
        HBox([index_input, index_slider]),
        data_type_widget,
        interactive_plot.children[-1]
    ])

# Create and display interactive plots for each dataset
train_plot = create_interactive_plot('Train')
val_plot = create_interactive_plot('Validation')
test_plot = create_interactive_plot('Test')

display(train_plot, val_plot, test_plot)

## **Define the dataset and dataloader**

In [4]:
class TrajectoryDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create datasets
train_dataset = TrajectoryDataset(X_train_scaled, y_train_scaled)
val_dataset = TrajectoryDataset(X_val_scaled, y_val_scaled)
test_dataset = TrajectoryDataset(X_test_scaled, y_test_scaled)

# Create dataloaders
batch_size = 4096
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # It's already shuffled but doesnt hurt though
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## **Define Training Functions**

In [5]:
def calculate_original_losses(loader, model, scaler, dataset_name):
    total_mse = 0
    total_mae = 0
    total_samples = 0
    mse_criterion = nn.MSELoss(reduction='mean')
    mae_criterion = nn.L1Loss(reduction='mean')
    
    with torch.no_grad():
        for batch_X, batch_y in loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            
            outputs_original = torch.from_numpy(scaler_y.inverse_transform(outputs.cpu().numpy())).to(device)
            batch_y_original = torch.from_numpy(scaler_y.inverse_transform(batch_y.cpu().numpy())).to(device)
            
            mse = mse_criterion(outputs_original, batch_y_original)
            mae = mae_criterion(outputs_original, batch_y_original)
            
            total_mse += mse.item() * batch_y.size(0)
            total_mae += mae.item() * batch_y.size(0)
            total_samples += batch_y.size(0)
    
    mse = total_mse / total_samples
    mae = total_mae / total_samples
    print(f"Original {dataset_name} MSE: {mse:.4f}, MAE: {mae:.4f}")
    return mse, mae

def train_model(model, train_loader, val_loader, optimizer, device, num_epochs, log_original_loss_every, scaler_y, patience):
    pbar = tqdm(range(num_epochs))
    best_val_loss = float('inf')
    no_improve = 0

    for epoch in pbar:
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = nn.MSELoss()(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss =  nn.MSELoss()(outputs, batch_y)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        
        pbar.set_postfix({
            'Epoch': epoch+1,
            'Train Loss': f'{train_loss:.4f}',
            'Val Loss': f'{val_loss:.4f}'
        })
        
        if (epoch + 1) % log_original_loss_every == 0:
            train_mse, train_mae = calculate_original_losses(train_loader, model, scaler_y, "Train")
            val_mse, val_mae = calculate_original_losses(val_loader, model, scaler_y, "Val")
            # test_mse, test_mae = calculate_original_losses(test_loader, model, scaler_y, "Test")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            no_improve = 0
        else:
            no_improve += 1
            if no_improve == patience:
                print("\nEarly stopping!")
                break

    return model


## **Some Parameters**

In [6]:
max_num_epochs = 200
best_val_loss = float('inf')
patience = 6
no_improve = 0
log_original_loss_every = 3

## **Use GRU RNN model**


In [9]:
class TrajectoryRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3):  # Changed default to 3 layers
        super(TrajectoryRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        _, hidden = self.rnn(x)
        output = self.fc(hidden[-1])  # Use the last layer's hidden state
        return output

# Initialize the model
input_size = 3  # Bv, Bh, Bp
hidden_size = 256
output_size = 2  # x, y
num_layers = 3  # Specify the number of layers you want
model = TrajectoryRNN(input_size, hidden_size, output_size, num_layers=num_layers).to(device)

# Define loss function and optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Call the function
model = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    device=device,
    num_epochs=max_num_epochs,
    log_original_loss_every=log_original_loss_every,
    scaler_y=scaler_y,
    patience=patience
)

  1%|          | 2/200 [00:11<12:11,  3.69s/it, Epoch=3, Train Loss=0.8936, Val Loss=0.8859]

Original Train MSE: 929.5074, MAE: 22.6758


  2%|▏         | 3/200 [00:13<15:33,  4.74s/it, Epoch=3, Train Loss=0.8936, Val Loss=0.8859]

Original Val MSE: 929.7922, MAE: 22.6618


  2%|▎         | 5/200 [00:24<13:17,  4.09s/it, Epoch=6, Train Loss=0.8503, Val Loss=0.8394]

Original Train MSE: 894.3743, MAE: 21.8696


  3%|▎         | 6/200 [00:26<15:27,  4.78s/it, Epoch=6, Train Loss=0.8503, Val Loss=0.8394]

Original Val MSE: 897.3463, MAE: 21.8666


  4%|▍         | 8/200 [00:38<13:26,  4.20s/it, Epoch=9, Train Loss=0.7960, Val Loss=0.7884]

Original Train MSE: 850.0376, MAE: 20.9711


  4%|▍         | 9/200 [00:40<15:14,  4.79s/it, Epoch=9, Train Loss=0.7960, Val Loss=0.7884]

Original Val MSE: 861.1381, MAE: 21.0527


  6%|▌         | 11/200 [00:51<13:29,  4.28s/it, Epoch=12, Train Loss=0.7373, Val Loss=0.7351]

Original Train MSE: 789.6988, MAE: 20.0024


  6%|▌         | 12/200 [00:54<15:09,  4.84s/it, Epoch=12, Train Loss=0.7373, Val Loss=0.7351]

Original Val MSE: 806.3640, MAE: 20.1420


  7%|▋         | 14/200 [01:05<13:16,  4.28s/it, Epoch=15, Train Loss=0.6749, Val Loss=0.6809]

Original Train MSE: 728.1016, MAE: 18.8908


  8%|▊         | 15/200 [01:07<15:02,  4.88s/it, Epoch=15, Train Loss=0.6749, Val Loss=0.6809]

Original Val MSE: 753.3644, MAE: 19.1553


  8%|▊         | 17/200 [01:19<13:08,  4.31s/it, Epoch=18, Train Loss=0.6105, Val Loss=0.6194]

Original Train MSE: 645.3095, MAE: 17.5331


  9%|▉         | 18/200 [01:21<14:41,  4.84s/it, Epoch=18, Train Loss=0.6105, Val Loss=0.6194]

Original Val MSE: 678.6678, MAE: 17.9478


 10%|█         | 20/200 [01:32<12:54,  4.30s/it, Epoch=21, Train Loss=0.5347, Val Loss=0.5523]

Original Train MSE: 563.1891, MAE: 16.0306


 10%|█         | 21/200 [01:35<14:27,  4.85s/it, Epoch=21, Train Loss=0.5347, Val Loss=0.5523]

Original Val MSE: 611.1933, MAE: 16.6759


 12%|█▏        | 23/200 [01:46<12:36,  4.28s/it, Epoch=24, Train Loss=0.4600, Val Loss=0.4906]

Original Train MSE: 483.0601, MAE: 14.6253


 12%|█▏        | 24/200 [01:48<14:18,  4.88s/it, Epoch=24, Train Loss=0.4600, Val Loss=0.4906]

Original Val MSE: 544.9678, MAE: 15.5208


 13%|█▎        | 26/200 [02:00<12:32,  4.32s/it, Epoch=27, Train Loss=0.3854, Val Loss=0.4199]

Original Train MSE: 389.3890, MAE: 12.8984


 14%|█▎        | 27/200 [02:02<14:03,  4.88s/it, Epoch=27, Train Loss=0.3854, Val Loss=0.4199]

Original Val MSE: 461.3706, MAE: 14.0666


 14%|█▍        | 29/200 [02:14<12:24,  4.35s/it, Epoch=30, Train Loss=0.3150, Val Loss=0.3733]

Original Train MSE: 342.6562, MAE: 11.9868


 15%|█▌        | 30/200 [02:16<13:52,  4.90s/it, Epoch=30, Train Loss=0.3150, Val Loss=0.3733]

Original Val MSE: 411.5064, MAE: 13.1601


 16%|█▌        | 32/200 [02:28<12:05,  4.32s/it, Epoch=33, Train Loss=0.2631, Val Loss=0.3162]

Original Train MSE: 282.1410, MAE: 10.7378


 16%|█▋        | 33/200 [02:30<13:39,  4.90s/it, Epoch=33, Train Loss=0.2631, Val Loss=0.3162]

Original Val MSE: 355.4704, MAE: 12.0632


 18%|█▊        | 35/200 [02:41<11:53,  4.33s/it, Epoch=36, Train Loss=0.2118, Val Loss=0.2581]

Original Train MSE: 222.0661, MAE: 9.4114


 18%|█▊        | 36/200 [02:44<13:18,  4.87s/it, Epoch=36, Train Loss=0.2118, Val Loss=0.2581]

Original Val MSE: 296.2789, MAE: 10.8730


 19%|█▉        | 38/200 [02:55<11:40,  4.32s/it, Epoch=39, Train Loss=0.1784, Val Loss=0.2320]

Original Train MSE: 194.4951, MAE: 8.7622


 20%|█▉        | 39/200 [02:57<12:58,  4.84s/it, Epoch=39, Train Loss=0.1784, Val Loss=0.2320]

Original Val MSE: 267.2644, MAE: 10.2883


 20%|██        | 41/200 [03:09<11:18,  4.27s/it, Epoch=42, Train Loss=0.1492, Val Loss=0.1996]

Original Train MSE: 163.3686, MAE: 7.9556


 21%|██        | 42/200 [03:11<12:50,  4.87s/it, Epoch=42, Train Loss=0.1492, Val Loss=0.1996]

Original Val MSE: 234.3871, MAE: 9.5071


 22%|██▏       | 44/200 [03:22<11:08,  4.29s/it, Epoch=45, Train Loss=0.1217, Val Loss=0.1766]

Original Train MSE: 146.3689, MAE: 7.4316


 22%|██▎       | 45/200 [03:25<12:27,  4.82s/it, Epoch=45, Train Loss=0.1217, Val Loss=0.1766]

Original Val MSE: 216.2278, MAE: 9.0133


 24%|██▎       | 47/200 [03:36<10:55,  4.28s/it, Epoch=48, Train Loss=0.1050, Val Loss=0.1599]

Original Train MSE: 122.7223, MAE: 6.8718


 24%|██▍       | 48/200 [03:38<12:07,  4.79s/it, Epoch=48, Train Loss=0.1050, Val Loss=0.1599]

Original Val MSE: 191.1164, MAE: 8.4874


 25%|██▌       | 50/200 [03:50<11:30,  4.60s/it, Epoch=51, Train Loss=0.0852, Val Loss=0.1385]


KeyboardInterrupt: 

## **Variant of GRU Model**

In [11]:
class TrajectoryRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3):
        super(TrajectoryRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.rnn = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        
        # Add batch normalization after RNN
        self.bn = nn.BatchNorm1d(hidden_size)
        
        # Add a fully connected layer with ReLU activation
        self.fc1 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu = nn.ReLU()
        
        # Add another batch normalization layer
        self.bn2 = nn.BatchNorm1d(hidden_size // 2)
        
        # Output layer
        self.fc2 = nn.Linear(hidden_size // 2, output_size)
    
    def forward(self, x):
        _, hidden = self.rnn(x)
        output = hidden[-1]  # Use the last layer's hidden state
        
        # Apply batch normalization
        output = self.bn(output)
        
        # Apply first fully connected layer and ReLU
        output = self.fc1(output)
        output = self.relu(output)
        
        # Apply second batch normalization
        output = self.bn2(output)
        
        # Apply final output layer
        output = self.fc2(output)
        
        return output

# Initialize the model
input_size = 3  # Bv, Bh, Bp
hidden_size = 512
output_size = 2  # x, y
num_layers = 3  # Specify the number of layers you want
model = TrajectoryRNN(input_size, hidden_size, output_size, num_layers=num_layers).to(device)

# Define loss function and optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Call the function
model = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    device=device,
    num_epochs=max_num_epochs,
    log_original_loss_every=log_original_loss_every,
    scaler_y=scaler_y,
    patience=patience
)

  1%|          | 2/200 [00:29<32:12,  9.76s/it, Epoch=3, Train Loss=0.8587, Val Loss=0.8544]

Original Train MSE: 901.3625, MAE: 22.0700


  2%|▏         | 3/200 [00:33<38:36, 11.76s/it, Epoch=3, Train Loss=0.8587, Val Loss=0.8544]

Original Val MSE: 906.3299, MAE: 22.0983


  2%|▎         | 5/200 [01:02<34:00, 10.47s/it, Epoch=6, Train Loss=0.8254, Val Loss=0.8298]

Original Train MSE: 880.3318, MAE: 21.5404


  3%|▎         | 6/200 [01:07<38:06, 11.79s/it, Epoch=6, Train Loss=0.8254, Val Loss=0.8298]

Original Val MSE: 886.7945, MAE: 21.5868


  4%|▍         | 8/200 [01:36<34:07, 10.66s/it, Epoch=9, Train Loss=0.7922, Val Loss=0.7917]

Original Train MSE: 854.0382, MAE: 20.9462


  4%|▍         | 9/200 [01:41<37:42, 11.85s/it, Epoch=9, Train Loss=0.7922, Val Loss=0.7917]

Original Val MSE: 866.1204, MAE: 21.0422


  4%|▍         | 9/200 [01:49<38:46, 12.18s/it, Epoch=9, Train Loss=0.7922, Val Loss=0.7917]


KeyboardInterrupt: 

## **Use LSTM based Model**

In [None]:
class TrajectoryLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3):
        super(TrajectoryLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        output = self.fc(hidden[-1])  # Use the last layer's hidden state
        return output

# Initialize the LSTM model
input_size = 3  # Bv, Bh, Bp
hidden_size = 512
output_size = 2  # x, y
num_layers = 4
lstm_model = TrajectoryLSTM(input_size, hidden_size, output_size, num_layers=num_layers).to(device)

# Define loss function and optimizer for LSTM
lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

# Train the LSTM model
lstm_model = train_model(
    model=lstm_model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=lstm_optimizer,
    device=device,
    num_epochs=max_num_epochs,
    log_original_loss_every=log_original_loss_every,
    scaler_y=scaler_y,
    patience=patience
)

## **Use TCN Model**

In [None]:
class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.bn1 = nn.BatchNorm1d(n_outputs)
        self.leaky_relu1 = nn.LeakyReLU(0.01)
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.bn2 = nn.BatchNorm1d(n_outputs)
        self.leaky_relu2 = nn.LeakyReLU(0.01)
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.bn1, self.leaky_relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.bn2, self.leaky_relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.leaky_relu = nn.LeakyReLU(0.01)
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.leaky_relu(out + res)

class TCNTrajectoryModel(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCNTrajectoryModel, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        # x needs to have dimension (N, C, L) in order to be passed into CNN
        y1 = self.tcn(x.transpose(1, 2)).transpose(1, 2)
        y2 = self.flatten(y1[:, -1, :].unsqueeze(1))
        return self.linear(y2)

# Initialize the TCN model
input_size = 3  # Bv, Bh, Bp
output_size = 2  # x, y
num_channels = [64, 128, 256, 128, 64]  # Increased number of channels and layers
kernel_size = 3
dropout = 0.2

tcn_model = TCNTrajectoryModel(input_size, output_size, num_channels, kernel_size, dropout).to(device)

tcn_optimizer = torch.optim.Adam(tcn_model.parameters(), lr=0.001)

# Train the TCN model
tcn_model = train_model(
    model=tcn_model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=tcn_optimizer,
    device=device,
    num_epochs=max_num_epochs,
    log_original_loss_every=log_original_loss_every,
    scaler_y=scaler_y,
    patience=patience
)


## **Transformer Based Model (todo)**

## **Evaluate the model on Test Set (todo: buggy and problematic here)**

In [None]:
# Set the model to evaluation mode
model.eval()

# Evaluate on test set
test_mse, test_mae = calculate_original_losses(test_loader, model, scaler_y, "Test")

print(f"Final Test MSE: {test_mse:.4f}")
print(f"Final Test MAE: {test_mae:.4f}")

## **Interactive Visulization (todo)**

## **Save the Model (todo)**

In [None]:
# unique_id = f"{socket.gethostname()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# dest_path = f"saved_models/best_model_{unique_id}.pth"

# os.makedirs('saved_models', exist_ok=True)
# !cp best_model.pth {dest_path}

# print(f"Best model copied and saved as '{dest_path}'")
