In [87]:
import torch
import os
import numpy as np
import pandas as pd
import joblib

import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

import RAE
import classification_rnn as CRNN
import trajectory_predictor as PRNN

import plotly.express as px
from pyproj import Transformer

In [88]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [89]:
# Reinstate scaler
obj = joblib.load(os.path.join("..", "..", "data", "aisdk", "scaler", "scaler_aisdk_2025.pkl"))

saved_feature_names = None

if isinstance(obj, StandardScaler):
    scaler = obj
    # If you stored feature names separately, keep as None here
    print("Loaded StandardScaler object from joblib.")
elif isinstance(obj, dict):
    scaler = StandardScaler()
    scaler.mean_ = np.asarray(obj['mean'])
    scaler.scale_ = np.asarray(obj['scale'])
    saved_feature_names = obj.get('feature_names', None)
    print("Reconstructed StandardScaler from dict params.")
else:
    raise TypeError(f"Unexpected scaler payload type: {type(obj)}")

print("Scaler ready. n_features:", scaler.mean_.shape[0],
      "| feature names:", (list(saved_feature_names) if saved_feature_names is not None else "None"))

Loaded StandardScaler object from joblib.
Scaler ready. n_features: 5 | feature names: None


In [None]:
# Reinstate autoencoder
ae_checkpoint = torch.load('../../checkpoints/rae/best_rae_model.pth')

# Recreate the model architecture
rae = RAE.RecurrentAutoencoder(
    input_dim=ae_checkpoint['hyperparameters']['feature_size'],
    hidden_dim=ae_checkpoint['hyperparameters']['hidden_size'],
    latent_dim=ae_checkpoint['hyperparameters']['latent_dim'],
    num_layers_encoder=ae_checkpoint['hyperparameters']['encoder_layers'],
    num_layers_decoder=ae_checkpoint['hyperparameters']['decoder_layers'],
    dropout=ae_checkpoint['hyperparameters']['dropout']
).to(device)

rae.load_state_dict(ae_checkpoint['model_state_dict'])
rae.eval()

RecurrentAutoencoder(
  (encoder): GRU(5, 64, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  (fc_latent): Linear(in_features=128, out_features=12, bias=True)
  (bn_latent): LayerNorm((12,), eps=1e-05, elementwise_affine=True)
  (fc_z_to_hidden): Linear(in_features=12, out_features=128, bias=True)
  (decoder): GRU(5, 64, num_layers=2, batch_first=True, dropout=0.2)
  (fc_output): Linear(in_features=64, out_features=5, bias=True)
)

In [98]:
c_checkpoint.keys()

odict_keys(['gru.weight_ih_l0', 'gru.weight_hh_l0', 'gru.bias_ih_l0', 'gru.bias_hh_l0', 'gru.weight_ih_l0_reverse', 'gru.weight_hh_l0_reverse', 'gru.bias_ih_l0_reverse', 'gru.bias_hh_l0_reverse', 'gru.weight_ih_l1', 'gru.weight_hh_l1', 'gru.bias_ih_l1', 'gru.bias_hh_l1', 'gru.weight_ih_l1_reverse', 'gru.weight_hh_l1_reverse', 'gru.bias_ih_l1_reverse', 'gru.bias_hh_l1_reverse', 'gru.weight_ih_l2', 'gru.weight_hh_l2', 'gru.bias_ih_l2', 'gru.bias_hh_l2', 'gru.weight_ih_l2_reverse', 'gru.weight_hh_l2_reverse', 'gru.bias_ih_l2_reverse', 'gru.bias_hh_l2_reverse', 'gru.weight_ih_l3', 'gru.weight_hh_l3', 'gru.bias_ih_l3', 'gru.bias_hh_l3', 'gru.weight_ih_l3_reverse', 'gru.weight_hh_l3_reverse', 'gru.bias_ih_l3_reverse', 'gru.bias_hh_l3_reverse', 'gru.weight_ih_l4', 'gru.weight_hh_l4', 'gru.bias_ih_l4', 'gru.bias_hh_l4', 'gru.weight_ih_l4_reverse', 'gru.weight_hh_l4_reverse', 'gru.bias_ih_l4_reverse', 'gru.bias_hh_l4_reverse', 'classifier.0.weight', 'classifier.0.bias', 'classifier.2.weight', '

In [None]:
# Reinstate classifier
c_checkpoint = torch.load('checkpoints/classifier/classification_rnn_model.pt')

c_rnn = CRNN.ClassificationRNN(
    input_size=5,
    hidden_size=128,
    num_layers=5,
    num_classes=10,
    bidirectional=True,
    rnn_dropout=0.1
).to(device)

c_rnn.load_state_dict(c_checkpoint)
c_rnn.eval()

ClassificationRNN(
  (gru): GRU(5, 128, num_layers=5, batch_first=True, dropout=0.1, bidirectional=True)
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=2, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=5, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5, out_features=10, bias=True)
  )
)

In [99]:
p_checkpoint['cfg']

{'device': 'mps',
 'input_dim': 5,
 'output_dim': 5,
 'hidden_dim': 128,
 'num_layers_encoder': 1,
 'num_layers_decoder': 1,
 'attn_dim': 64,
 'batch_size': 128,
 'lr': 0.0003,
 'weight_decay': 0.0,
 'teacher_forcing': 0.7,
 'max_norm': 1.0,
 'epochs': 100}

In [None]:
# Reinstate the predictor
p_checkpoint = torch.load('checkpoints/predictor/trajectory_cluster-1.pt')

p_rnn = PRNN.TrajectoryPredictor(
    input_dim=p_checkpoint['cfg']['input_dim'],
    hidden_dim=p_checkpoint['cfg']['hidden_dim'],
    output_dim=p_checkpoint['cfg']['output_dim'],
    num_layers_encoder=p_checkpoint['cfg']['num_layers_encoder'],
    num_layers_decoder=p_checkpoint['cfg']['num_layers_decoder'],
    attn_dim=p_checkpoint['cfg']['attn_dim']
).to(device)

p_rnn.load_state_dict(p_checkpoint['model_state'])
p_rnn.eval()

TrajectoryPredictor(
  (encoder): GRU(5, 128, batch_first=True, bidirectional=True)
  (hidden_enc_to_dec): Linear(in_features=256, out_features=128, bias=True)
  (attn_mlp): Linear(in_features=384, out_features=64, bias=True)
  (attn_v): Linear(in_features=64, out_features=1, bias=False)
  (decoder): GRU(261, 128, batch_first=True)
  (hidden_to_output): Linear(in_features=389, out_features=5, bias=True)
)

In [16]:
BASE = "../../data/aisdk/"

test_traj = np.load(os.path.join(BASE, "windows/test_trajectories.npz"))

In [17]:
X_test = test_traj["past"]         # (N, T_in, 5)
Y_test = test_traj["future"]       # (N, T_out, 5)
traj_ids = test_traj["traj_id"]    # (N,)

X_test_t = torch.tensor(X_test, dtype=torch.float32, device=device)

In [18]:
# number of samples in test set
N = X_test_t.shape[0]
print(N)

55274


In [19]:
# 1) Classification
c_rnn.eval()
with torch.no_grad():
    logits = c_rnn(X_test_t)                 # (N, num_classes)
    probs = F.softmax(logits, dim=1)         # (N, num_classes)
    pred_cls = probs.argmax(dim=1)           # (N,)

# Summary
unique, counts = torch.unique(pred_cls, return_counts=True)
print("Predicted class distribution on test:")
for u, c in zip(unique.tolist(), counts.tolist()):
    print(f"  class {u}: {c} samples")

Predicted class distribution on test:
  class 9: 55274 samples


In [None]:
# 2) Select predicted cluster 9 (noise)
cluster_id = 9
mask = (pred_cls.cpu().numpy() == cluster_id)
idx = np.where(mask)[0]
print(f"\nSelected {len(idx)} windows with predicted class == {cluster_id}")

if len(idx) == 0:
    print("No test windows predicted as cluster 4. Skipping trajectory prediction.")
else:
    X4 = X_test[idx]     # (M, T_in, 5)
    Y4 = Y_test[idx]     # (M, T_out, 5)

    # DataLoader
    X4_t = torch.tensor(X4, dtype=torch.float32)
    Y4_t = torch.tensor(Y4, dtype=torch.float32)
    test_loader4 = DataLoader(TensorDataset(X4_t, Y4_t), batch_size=128, shuffle=False)

    # 3) Predict future with cluster-4 predictor
    p_rnn.eval()
    preds = []
    gts = []
    mses = []
    with torch.no_grad():
        for xb, yb in test_loader4:
            xb = xb.to(device)
            yb = yb.to(device)
            y_pred = p_rnn(
                xb,
                target_length=yb.size(1),
                targets=None,
                teacher_forcing_ratio=0.0
            )
            preds.append(y_pred.cpu().numpy())
            gts.append(yb.cpu().numpy())
            # MSE per-batch
            mse = ((y_pred - yb) ** 2).mean().item()
            mses.append(mse)

    preds = np.concatenate(preds, axis=0)  # (M, T_out, 5)
    gts = np.concatenate(gts, axis=0)      # (M, T_out, 5)
    overall_mse = float(np.mean((preds - gts) ** 2))

    print(f"\nTrajectory prediction summary for cluster {cluster_id}:")
    print(f"  Batches: {len(mses)}")
    print(f"  Mean batch MSE: {np.mean(mses):.6f}")
    print(f"  Overall MSE: {overall_mse:.6f}")

    # Optional: save predictions
    out_path = os.path.join("../../checkpoints/final_predictor", f"test_preds_cluster{cluster_id}.npz")
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    np.savez_compressed(out_path, idx=idx, preds=preds, gts=gts)
    print(f"Saved predictions to {out_path}")


Selected 55274 windows with predicted class == 9

Trajectory prediction summary for cluster 9:
  Batches: 432
  Mean batch MSE: 0.038720
  Overall MSE: 0.038721
Saved predictions to ../../checkpoints/final_predictor/test_preds_cluster9.npz


In [40]:
def visualize_trajectories(df, color_by='MMSI', title=None, zoom=5, height=800):
    
    transformer = Transformer.from_crs("EPSG:25832", "EPSG:4326", always_xy=True)
    lon, lat = transformer.transform(df['UTM_x'].values, df['UTM_y'].values)
    
    if 'Trajectory' in df.columns:
        vis_cols = ['Trajectory', 'Timestamp', 'UTM_x', 'UTM_y']
    else:
        vis_cols = ['Timestamp', 'UTM_x', 'UTM_y']

    if color_by not in vis_cols:
        vis_cols.append(color_by)

    if 'SOG' in df.columns and 'SOG' not in vis_cols:
        vis_cols.append('SOG')
    
    vis_df = df[vis_cols].copy()
    vis_df['Longitude'] = lon
    vis_df['Latitude'] = lat
    
    # Generate title if not provided
    if title is None:
        date_min = vis_df['Timestamp'].min().date()
        date_max = vis_df['Timestamp'].max().date()
        title = f"Ship Trajectories - {date_min} to {date_max}"
    
    # Create visualization with trajectories using lat/lon on a map
    fig = px.line_map(
        vis_df.sort_values('Timestamp'),
        lat="Latitude",
        lon="Longitude",
        color=color_by,
        # hover_data=["Trajectory", "Timestamp", "SOG"] if "SOG" in vis_df.columns else ["MMSI", "Timestamp"],
        zoom=zoom,
        title=title
    )
    
    fig.update_layout(
        mapbox_style="open-street-map",
        showlegend=False,  # Hide legend since there can be many trajectories
        height=height
    )
    
    print(f"✓ Visualization complete - colored by '{color_by}'")
    
    return fig

In [66]:
len(test_traj['past'])

55274

In [56]:
len(preds)

55274

In [86]:
# Take one trajectory and plot its real and predicted paths
if len(idx) == 0 or 'preds' not in globals():
    print("No samples/predictions to visualize.")
else:
    # pick the first sample within the selected cluster
    m = 1300
    orig_i = idx[m]

    past = X_test[orig_i]        # (T_in, 5)
    gt_future = Y_test[orig_i]   # (T_out, 5)
    pred_future = preds[m]       # (T_out, 5)

    # Columns in the arrays (scaled during preprocessing)
    cols = ['UTM_x', 'UTM_y', 'SOG', 'v_east', 'v_north']

    # Unscale using the loaded StandardScaler
    def inverse_scale_array(arr, feature_names, scaler, saved_feature_names=None):
        arr = arr.copy()
        # Determine indices of the features in scaler to align mean/scale correctly
        if saved_feature_names is not None:
            name_to_idx = {name: i for i, name in enumerate(saved_feature_names)}
            idxs = [name_to_idx[name] for name in feature_names]
            mean = scaler.mean_[idxs]
            scale = scaler.scale_[idxs]
        else:
            # Assume ordering matches the first len(feature_names) entries
            mean = scaler.mean_[:len(feature_names)]
            scale = scaler.scale_[:len(feature_names)]
        arr[:, :len(feature_names)] = arr[:, :len(feature_names)] * scale + mean
        return arr

    past_unscaled = inverse_scale_array(past, cols, scaler, saved_feature_names)
    gt_unscaled = inverse_scale_array(gt_future, cols, scaler, saved_feature_names)
    pred_unscaled = inverse_scale_array(pred_future, cols, scaler, saved_feature_names)

    import pandas as pd

    def to_df(arr, segment, t0=0):
        df = pd.DataFrame(arr, columns=cols)
        df["Segment"] = segment
        df["Timestamp"] = pd.RangeIndex(start=t0, stop=t0 + len(df), step=1)
        return df

    df_past = to_df(past_unscaled, "Past", t0=0)
    df_gt = to_df(gt_unscaled, "Future (GT)", t0=df_past["Timestamp"].max() + 1)
    df_pred = to_df(pred_unscaled, "Future (Pred)", t0=df_gt["Timestamp"].max() + 1)

    vis_df = pd.concat([df_past, df_gt, df_pred], ignore_index=True)

    # Use your visualize_trajectories helper; color by the Segment we added
    fig = visualize_trajectories(
        vis_df,
        color_by="Segment",
        title=f"Trajectory {orig_i}: Past vs Future (GT) vs Pred",
        zoom=6,
        height=700
    )
    fig.show()

✓ Visualization complete - colored by 'Segment'


In [76]:
# Whole-trajectory plotting (all instances of the same traj_id)
def plot_full_trajectory(traj_id_target):
    if 'preds' not in globals() or len(idx) == 0:
        print("No predictions available.")
        return

    cols = ['UTM_x', 'UTM_y', 'SOG', 'v_east', 'v_north']

    all_idxs = np.where(traj_ids == traj_id_target)[0]
    if len(all_idxs) == 0:
        print(f"No windows found for traj_id={traj_id_target}")
        return

    # Map original indices to prediction rows for this traj_id
    pred_rows = [(m_pred, orig_i) for m_pred, orig_i in enumerate(idx) if traj_ids[orig_i] == traj_id_target]
    if len(pred_rows) == 0:
        print(f"No predicted windows for traj_id={traj_id_target} in selected cluster {cluster_id}.")
        return

    all_idxs_sorted = np.sort(all_idxs)

    # Build True path: first past, then all GT futures (continuous timestamps)
    t_true = 0
    dfs_true = []
    past = X_test[all_idxs_sorted[0]]
    past_unscaled = inverse_scale_array(past, cols, scaler, saved_feature_names)
    dfs_true.append(to_df(past_unscaled, cols, "True", t0=t_true))
    t_true += past_unscaled.shape[0]
    for i_orig in all_idxs_sorted:
        gt_future = Y_test[i_orig]
        gt_unscaled = inverse_scale_array(gt_future, cols, scaler, saved_feature_names)
        dfs_true.append(to_df(gt_unscaled, cols, "True", t0=t_true))
        t_true += gt_unscaled.shape[0]
    df_true = pd.concat(dfs_true, ignore_index=True)

    # Build Predicted path: same past, then predicted futures for available windows
    t_pred = 0
    dfs_pred = []
    dfs_pred.append(to_df(past_unscaled, cols, "Predicted", t0=t_pred))
    t_pred += past_unscaled.shape[0]
    for m_pred, i_orig in sorted(pred_rows, key=lambda x: x[1]):
        pred_future = preds[m_pred]
        pred_unscaled = inverse_scale_array(pred_future, cols, scaler, saved_feature_names)
        dfs_pred.append(to_df(pred_unscaled, cols, "Predicted", t0=t_pred))
        t_pred += pred_unscaled.shape[0]
    df_pred = pd.concat(dfs_pred, ignore_index=True)

    # Combined overlay
    vis_df = pd.concat([df_true, df_pred], ignore_index=True)
    fig = visualize_trajectories(
        vis_df,
        color_by="Segment",
        title=f"Full Trajectory {traj_id_target}: True vs Predicted",
        zoom=6,
        height=800
    )
    fig.show()

    # Optional: separate GT-only and Pred-only maps
    fig_gt = visualize_trajectories(df_true, color_by="Segment", title=f"Trajectory {traj_id_target}: True", zoom=6, height=700)
    fig_gt.show()
    fig_pred = visualize_trajectories(df_pred, color_by="Segment", title=f"Trajectory {traj_id_target}: Predicted", zoom=6, height=700)
    fig_pred.show()

# Example: choose a specific traj_id (replace with one you want)
# plot_full_trajectory(TRAJ_ID_HERE)
if len(idx) > 0:
    plot_full_trajectory(traj_ids[idx[1313]])
else:
    print("No windows in selected cluster; cannot plot full trajectory.")

✓ Visualization complete - colored by 'Segment'


✓ Visualization complete - colored by 'Segment'


✓ Visualization complete - colored by 'Segment'
