In [6]:
import numpy as np
import torch
import torch.nn as nn
import wandb
import os
import yaml
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from dotenv import load_dotenv
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from src.models.classification_rnn import ClassificationRNN, DEVICE
from src.utils.seed import set_seed
from src.utils.config import flatten_config
from src.datasets.window_maker import make_past_future_windows, load_parquet_files
from src.training.train_classification_rnn import run_classification_train_rnn

seed = 42
test_size = 0.3

PARQUET_FILE_FINAL = os.path.join("data", "aisdk", "processed")


In [2]:
make_past_future_windows()

Loading input dataset...
   → Loaded 191,668 rows.
Total trajectory segments: 359

   → Processed 50/359 segments (+2 windows, cluster 8)
   → Processed 100/359 segments (+440 windows, cluster 5)
   → Processed 150/359 segments (+1041 windows, cluster 1)
   → Processed 200/359 segments (+171 windows, cluster 5)
   → Processed 250/359 segments (+547 windows, cluster 1)
   → Processed 300/359 segments (+420 windows, cluster 1)
   → Processed 350/359 segments (+10 windows, cluster 0)

DONE!
   → Processed segments: 359
   → Total windows generated: 171,461
   → Output stored under: data/aisdk/processed/windows_30_30



In [5]:
X, Y, C = load_parquet_files()

→ loading data/aisdk/processed/windows_30_30/cluster_id=4
→ loading data/aisdk/processed/windows_30_30/cluster_id=3
→ loading data/aisdk/processed/windows_30_30/cluster_id=2
→ loading data/aisdk/processed/windows_30_30/cluster_id=5
→ loading data/aisdk/processed/windows_30_30/cluster_id=0
→ loading data/aisdk/processed/windows_30_30/cluster_id=7
→ loading data/aisdk/processed/windows_30_30/cluster_id=9
→ loading data/aisdk/processed/windows_30_30/cluster_id=8
→ loading data/aisdk/processed/windows_30_30/cluster_id=6
→ loading data/aisdk/processed/windows_30_30/cluster_id=1


### 1.1 Hyperparameters

In [7]:
run_classification_train_rnn(X, C)

Training started on device: cpu...


Epoch 1/10:   3%|▎         | 112/4286 [00:05<03:07, 22.29it/s]


KeyboardInterrupt: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x162c83320>> (for post_run_cell), with arguments args (<ExecutionResult object at 172b79dc0, execution_count=7 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 169fc9310, raw_cell="run_classification_train_rnn(X, C)" transformed_cell="run_classification_train_rnn(X, C)
" store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/Users/helgamariamagnusdottir/Documents/dtu/deep_learning/DL-group-63-P29/train_classifier_rnn.ipynb#X21sZmlsZQ%3D%3D> result=None>,),kwargs {}:


ConnectionResetError: Connection lost

In [3]:
# Hyperparameters for the Classification rnn

num_epochs = 100
weight_decay = 1e-5
num_classes = 20 # number of groups from hdbscan
hidden_size = 64
num_layers = 8
batch_size = 128
lr = 0.001

## Read the data

In [None]:
# Fetch trajectories
df = pd.read_parquet('../../data/aisdk/processed/aisdk_2025')

# Convert Timestamp to datetime if it's not already
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Group trajectories
trajectories = []
    
for traj_id in df['Trajectory'].unique():
    traj_data = df[df['Trajectory'] == traj_id].sort_values('Timestamp')
    features = traj_data[['UTM_x', 'UTM_y', 'SOG', 'v_east', 'v_north']].values
    trajectories.append(features)

# Split into train / test
train, val = train_test_split(trajectories, test_size=test_size, random_state=seed)

# Normalize
train_stacked = np.vstack(train)
scaler = StandardScaler()
scaler.fit(train_stacked) 

train_s = [scaler.transform(traj) for traj in train]
val_s = [scaler.transform(traj) for traj in val]
