In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import math
import os
import inspect
import csv

In [2]:
class SelfAttention(nn.Module):
    """
    A single-head self-attention layer for sequences of shape (B, T, hidden_dim).
    Outputs the same shape (B, T, hidden_dim).
    """
    def __init__(self, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.Wq = nn.Linear(hidden_dim, hidden_dim)
        self.Wk = nn.Linear(hidden_dim, hidden_dim)
        self.Wv = nn.Linear(hidden_dim, hidden_dim)
        self.scale = math.sqrt(hidden_dim)

    def forward(self, x):
        """
        x: (batch, seq_len, hidden_dim)
        returns: (batch, seq_len, hidden_dim)
        """
        Q = self.Wq(x)                            # (B, T, hidden_dim)
        K = self.Wk(x)                            # (B, T, hidden_dim)
        V = self.Wv(x)                            # (B, T, hidden_dim)

        attn_scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale  # (B, T, T)
        attn_weights = torch.softmax(attn_scores, dim=-1)           # (B, T, T)

        out = torch.bmm(attn_weights, V)                             # (B, T, hidden_dim)
        return out

class LSTMAttnNoDiff(nn.Module):
    """
    Baseline LSTM + single-head self-attention for direct forecasting:
    - Input:  (B, 192, 1)   (the 'history')
    - Output: (B, 192, 1)   (the 'forecast' for the next 48 hours)
    
    Notes:
    - This is a simplified approach: effectively, the model sees 192 input time steps
      and produces 192 output steps in one shot.
    - There's no "difference vector" or "support" logic in this baseline.
    """
    def __init__(self, hidden_dim=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.attention = SelfAttention(hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        """
        x: (batch, 192, 1) representing the 'history' chunk
        returns: (batch, 192, 1) as the 'forecast'
        """
        lstm_out, (hn, cn) = self.lstm(x)    # (B, 192, hidden_dim)
        attn_out = self.attention(lstm_out)  # (B, 192, hidden_dim)
        pred = self.fc_out(attn_out)         # (B, 192, 1)
        return pred


In [4]:
training_dataset = np.load("../DatasetCreation/training_dataset.npy")   # shape: (N, 384)
val_dataset   = np.load("../DatasetCreation/validation_dataset.npy") # shape: (M, 384)

print("train_dataset shape:", training_dataset.shape)
print("val_dataset shape:",   val_dataset.shape)

train_dataset shape: (198071, 384)
val_dataset shape: (84914, 384)


In [5]:
def get_batch(dataset, batch_size):
    """
    dataset: shape (N, 384), each row is a 4-day chunk
    returns:
      x_tensor: (B, 192, 1) -> model input
      y_tensor: (B, 192, 1) -> ground-truth forecast
    """
    idxs = np.random.choice(len(dataset), batch_size, replace=False)

    x_list = []
    y_list = []

    for i in idxs:
        chunk = dataset[i]         # shape (384,)
        x_ = chunk[:192]           # shape (192,) -> model input
        y_ = chunk[192:]           # shape (192,) -> ground-truth next 48 hrs
        x_list.append(x_)
        y_list.append(y_)

    x_arr = np.array(x_list)       # (B, 192)
    y_arr = np.array(y_list)       # (B, 192)

    # Add final dimension
    x_tensor = torch.tensor(x_arr, dtype=torch.float).unsqueeze(-1)  # (B, 192, 1)
    y_tensor = torch.tensor(y_arr, dtype=torch.float).unsqueeze(-1)  # (B, 192, 1)
    return x_tensor, y_tensor


In [None]:
# train_lstm_attn_no_diff.py

import torch
import torch.nn as nn
import torch.optim as optim
import csv

BATCH_SIZE = 16
EPOCHS = 10000
LEARNING_RATE = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

model = LSTMAttnNoDiff(hidden_dim=64, num_layers=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion_mae = nn.L1Loss()
criterion_mse = nn.MSELoss()

csv_filename = "lstm_attn_no_diff_log.csv"
with open(csv_filename, "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "MAE", "MSE"])

for epoch in range(EPOCHS):
    total_mae = 0.0
    total_mse = 0.0

    # We'll define some number of steps per epoch, e.g. 100
    for step in range(100):
        x_batch, y_batch = get_batch(training_dataset, BATCH_SIZE)
        
        x_batch = x_batch.to(device)  # (B, 192, 1)
        y_batch = y_batch.to(device)  # (B, 192, 1)

        optimizer.zero_grad()
        y_pred = model(x_batch)       # => (B, 192, 1)

        loss_mae = criterion_mae(y_pred, y_batch)
        loss_mse = criterion_mse(y_pred, y_batch)

        loss_mae.backward()
        optimizer.step()

        total_mae += loss_mae.item()
        total_mse += loss_mse.item()

    avg_mae = total_mae / (step + 1)
    avg_mse = total_mse / (step + 1)
    print(f"Epoch {epoch+1}/{EPOCHS} - MAE: {avg_mae:.4f}, MSE: {avg_mse:.4f}")

    # Log to CSV
    with open(csv_filename, "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_mae, avg_mse])

    # Save model checkpoint
    torch.save(model.state_dict(), f"lstm_attn_no_diff_epoch_{epoch+1}.pth")


Epoch 1/10000 - MAE: 771.6584, MSE: 953664.0162
Epoch 2/10000 - MAE: 630.2396, MSE: 693901.1706
Epoch 3/10000 - MAE: 517.7472, MSE: 449423.8153
Epoch 4/10000 - MAE: 417.4274, MSE: 281088.7684
Epoch 5/10000 - MAE: 283.2042, MSE: 155913.9914
Epoch 6/10000 - MAE: 184.6049, MSE: 79255.1807
Epoch 7/10000 - MAE: 177.2317, MSE: 76978.0163
Epoch 8/10000 - MAE: 177.5070, MSE: 77588.7728
Epoch 9/10000 - MAE: 175.0283, MSE: 75269.7239
Epoch 10/10000 - MAE: 172.2946, MSE: 74270.7884
Epoch 11/10000 - MAE: 171.0407, MSE: 73436.9364
Epoch 12/10000 - MAE: 172.3592, MSE: 74980.3309
Epoch 13/10000 - MAE: 165.2692, MSE: 69834.3669
Epoch 14/10000 - MAE: 172.5609, MSE: 75087.3074
Epoch 15/10000 - MAE: 166.0646, MSE: 69406.5939
Epoch 16/10000 - MAE: 165.1807, MSE: 67958.7837
Epoch 17/10000 - MAE: 163.4492, MSE: 66270.2712
Epoch 18/10000 - MAE: 156.3641, MSE: 62764.5245
Epoch 19/10000 - MAE: 163.8532, MSE: 66613.2888
Epoch 20/10000 - MAE: 161.2004, MSE: 64688.8641
Epoch 21/10000 - MAE: 158.4964, MSE: 65501.2