In [1]:
!pip install dtw
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import csv
import os
from dtw import accelerated_dtw
from tqdm import tqdm
import math



In [2]:
train_query   = np.load("../DatasetCreation/training_dataset.npy")     # shape (NQ, 384)
train_support = np.load("../DatasetCreation/training_support_dataset.npy") # shape (NS, 384)

def precompute_neighbors(query_dataset, support_dataset):
    """
    For each query row in `query_dataset`, find nearest support index in `support_dataset`.
    Returns an array neighbor_array of shape (len(query_dataset),),
    where neighbor_array[i] is the best support index for query i.
    """
    neighbor_array = np.zeros(len(query_dataset), dtype=np.int64)
    for i in range(len(query_dataset)):
        query_chunk = query_dataset[i]      # (384,)
        query_past  = query_chunk[:192]     # (192,)

        best_dist = float('inf')
        best_idx  = 0
        for j, support_chunk in enumerate(support_dataset):
            support_past = support_chunk[:192]
            dist = np.linalg.norm(query_past - support_past)
            if dist < best_dist:
                best_dist = dist
                best_idx  = j
        neighbor_array[i] = best_idx
    return neighbor_array

print("Precomputing nearest support neighbors for each query sample...")
train_query_neighbors = precompute_neighbors(train_query, train_support)
print("Done. neighbor_array shape:", train_query_neighbors.shape)

class LSTMEmbed(nn.Module):
    """
    A simple LSTM-based embedder that reads the 192 time steps (each step is 1D),
    and outputs a 64-dimensional embedding.
    """
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=1, embed_dim=64):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        # Project from LSTM hidden_dim -> final embedding dimension (64)
        self.fc_out = nn.Linear(hidden_dim, embed_dim)

    def forward(self, x):
        """
        x: (batch, seq_len=192, input_dim=1)
        returns: (batch, embed_dim=64)
        """
        # LSTM output shape: (batch, seq_len, hidden_dim)
        # h_n shape: (num_layers, batch, hidden_dim)
        lstm_out, (h_n, c_n) = self.lstm(x)
        
        # We'll take the last layer’s hidden state, shape => (batch, hidden_dim)
        # by default h_n is (num_layers, batch, hidden_dim). We want h_n[-1].
        last_state = h_n[-1,:,:]  # => (batch, hidden_dim)
        
        # Project to final embedding dimension
        emb = self.fc_out(last_state)  # => (batch, 64)
        return emb

class SiameseLSTM(nn.Module):
    """
    A "Siamese" approach that uses the LSTM embedding for x1 and x2,
    and returns the difference vector.
    """
    def __init__(self, lstm_embed):
        super().__init__()
        self.lstm_embed = lstm_embed

    def forward(self, x1, x2):
        """
        x1, x2: shape (batch, 192, 1)
        returns: (batch, 64)
        """
        emb1 = self.lstm_embed(x1)  # => (batch, 64)
        emb2 = self.lstm_embed(x2)  # => (batch, 64)
        diff = emb1 - emb2
        return diff

class LSTMForecaster(nn.Module):
    """
    Forecasts the next 192 points from the difference vector + support future.
    """
    def __init__(self, input_dim=1, diff_dim=64, hidden_dim=64, num_layers=1):
        super().__init__()
        self.diff_to_hidden = nn.Linear(diff_dim, hidden_dim)
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, support_future, diff_vec):
        """
        support_future: (batch, 192, 1)
        diff_vec: (batch, 64)
        => returns: (batch, 192, 1)
        """
        # Build hidden state from difference
        h0 = self.diff_to_hidden(diff_vec)  # => (batch, hidden_dim)
        c0 = torch.zeros_like(h0)           # => (batch, hidden_dim)
        # Reshape for LSTM: (num_layers=1, batch, hidden_dim)
        h0 = h0.unsqueeze(0)
        c0 = c0.unsqueeze(0)

        lstm_out, (hn, cn) = self.lstm(support_future, (h0, c0))
        pred = self.fc_out(lstm_out)        # => (batch, 192, 1)
        return pred

def get_batch(query_dataset, support_dataset, neighbor_array, batch_size):
    """
    neighbor_array: shape (NQ,). neighbor_array[i] = best support index for query i
    returns x_test, y_test, x_support, y_support => each shape (B,192,1)
    """
    idxs = np.random.choice(len(query_dataset), batch_size, replace=False)

    x_test_list = []
    y_test_list = []
    x_support_list = []
    y_support_list = []

    for idx in idxs:
        chunk = query_dataset[idx]   # shape (384,)
        test_past   = chunk[:192]
        test_future = chunk[192:]

        idx_support = neighbor_array[idx]
        support_chunk = support_dataset[idx_support]
        support_past   = support_chunk[:192]
        support_future = support_chunk[192:]

        x_test_list.append(test_past)
        y_test_list.append(test_future)
        x_support_list.append(support_past)
        y_support_list.append(support_future)

    x_test_arr     = np.array(x_test_list,     dtype=np.float32)
    y_test_arr     = np.array(y_test_list,     dtype=np.float32)
    x_support_arr  = np.array(x_support_list,  dtype=np.float32)
    y_support_arr  = np.array(y_support_list,  dtype=np.float32)

    x_test_tensor     = torch.tensor(x_test_arr).unsqueeze(-1)     # => (B,192,1)
    y_test_tensor     = torch.tensor(y_test_arr).unsqueeze(-1)
    x_support_tensor  = torch.tensor(x_support_arr).unsqueeze(-1)
    y_support_tensor  = torch.tensor(y_support_arr).unsqueeze(-1)

    return x_test_tensor, y_test_tensor, x_support_tensor, y_support_tensor

Precomputing nearest support neighbors for each query sample...
Done. neighbor_array shape: (16636,)


In [3]:
########################################
# 5) BUILD & TRAIN THE PLAIN LSTM MODEL
########################################
device = "cuda" if torch.cuda.is_available() else "cpu"

# 1) Build LSTM embed => Siamese => Forecaster
lstm_embed = LSTMEmbed(
    input_dim=1,
    hidden_dim=64,      # LSTM hidden dimension
    num_layers=1,       # LSTM layers
    embed_dim=64        # final embedding dim for Siamese difference
)
siamese_model = SiameseLSTM(lstm_embed).to(device)
lstm_forecaster = LSTMForecaster(
    input_dim=1,
    diff_dim=64,
    hidden_dim=64,
    num_layers=1
).to(device)

# 2) Optimizer, Loss
params = list(siamese_model.parameters()) + list(lstm_forecaster.parameters())
optimizer = optim.Adam(params, lr=1e-3)
criterion_mae = nn.L1Loss()
criterion_mse = nn.MSELoss()

# 3) Training loop
EPOCHS = 10000
BATCH_SIZE = 16
csv_filename = "lstm_baseline_training_log.csv"
with open(csv_filename, "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "MAE", "MSE"])

for epoch in range(EPOCHS):
    epoch_mae = 0.0
    epoch_mse = 0.0
    steps = 100

    for step in range(steps):
        x_test, y_test, x_support, y_support = get_batch(
            query_dataset   = train_query,
            support_dataset = train_support,
            neighbor_array  = train_query_neighbors,
            batch_size      = BATCH_SIZE
        )
        x_test     = x_test.to(device)
        y_test     = y_test.to(device)
        x_support  = x_support.to(device)
        y_support  = y_support.to(device)

        optimizer.zero_grad()

        # Siamese difference vector
        diff_vec = siamese_model(x_test, x_support)        # => (B,64)

        # Forecast next 192 points
        y_pred = lstm_forecaster(y_support, diff_vec)      # => (B,192,1)

        # Losses
        loss_mae = criterion_mae(y_pred, y_test)
        loss_mse = criterion_mse(y_pred, y_test)

        # Backprop
        loss_mae.backward()  # or combine them if you'd like
        optimizer.step()

        epoch_mae += loss_mae.item()
        epoch_mse += loss_mse.item()

    avg_mae = epoch_mae / steps
    avg_mse = epoch_mse / steps
    print(f"Epoch {epoch+1}/{EPOCHS} => MAE: {avg_mae:.4f}, MSE: {avg_mse:.4f}")

    # Log
    with open(csv_filename, "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_mae, avg_mse])

    # Optionally save
    torch.save(siamese_model.state_dict(),     f"lstm_embed_siamese_epoch_{epoch+1}.pth")
    torch.save(lstm_forecaster.state_dict(),  f"lstm_forecaster_epoch_{epoch+1}.pth")

Epoch 1/10000 => MAE: 667.5436, MSE: 786787.4888
Epoch 2/10000 => MAE: 648.2686, MSE: 757761.9525
Epoch 3/10000 => MAE: 648.5870, MSE: 762343.9794
Epoch 4/10000 => MAE: 651.3552, MSE: 764162.0178
Epoch 5/10000 => MAE: 661.5286, MSE: 784954.2438
Epoch 6/10000 => MAE: 646.0154, MSE: 756073.5922
Epoch 7/10000 => MAE: 644.5702, MSE: 760405.0959
Epoch 8/10000 => MAE: 631.0667, MSE: 737747.8444
Epoch 9/10000 => MAE: 605.0941, MSE: 690248.5337
Epoch 10/10000 => MAE: 615.5886, MSE: 706413.1937
Epoch 11/10000 => MAE: 629.9861, MSE: 731187.3622
Epoch 12/10000 => MAE: 615.2931, MSE: 709006.9659
Epoch 13/10000 => MAE: 614.7718, MSE: 722088.4500
Epoch 14/10000 => MAE: 604.3060, MSE: 696825.1509
Epoch 15/10000 => MAE: 615.3418, MSE: 716661.0447
Epoch 16/10000 => MAE: 599.2076, MSE: 685883.6591
Epoch 17/10000 => MAE: 603.2680, MSE: 696185.5105
Epoch 18/10000 => MAE: 610.4153, MSE: 709592.0403
Epoch 19/10000 => MAE: 580.2938, MSE: 655858.7222
Epoch 20/10000 => MAE: 583.9783, MSE: 664188.8347
Epoch 21/

KeyboardInterrupt: 