In [3]:
!pip install dtw
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd
import numpy as np

from dtw import accelerated_dtw
from tqdm import tqdm

import math
import os
import inspect
import csv



In [1]:
import math
import torch
import torch.nn as nn

class SelfAttention(nn.Module):
    """
    A single-head self-attention layer for sequences of shape (B, T, hidden_dim).
    Outputs the same shape (B, T, hidden_dim).
    """
    def __init__(self, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.Wq = nn.Linear(hidden_dim, hidden_dim)
        self.Wk = nn.Linear(hidden_dim, hidden_dim)
        self.Wv = nn.Linear(hidden_dim, hidden_dim)
        self.scale = math.sqrt(hidden_dim)

    def forward(self, x):
        """
        x: (batch, seq_len, hidden_dim)
        returns: (batch, seq_len, hidden_dim)
        """
        Q = self.Wq(x)                            # (B, T, hidden_dim)
        K = self.Wk(x)                            # (B, T, hidden_dim)
        V = self.Wv(x)                            # (B, T, hidden_dim)

        attn_scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale  # (B, T, T)
        attn_weights = torch.softmax(attn_scores, dim=-1)           # (B, T, T)

        out = torch.bmm(attn_weights, V)                             # (B, T, hidden_dim)
        return out

class LSTMAttnNoDiff(nn.Module):
    """
    Baseline LSTM + single-head self-attention for direct forecasting:
    - Input:  (B, 192, 1)   (the 'history')
    - Output: (B, 192, 1)   (the 'forecast' for the next 48 hours)
    
    Notes:
    - This is a simplified approach: effectively, the model sees 192 input time steps
      and produces 192 output steps in one shot.
    - There's no "difference vector" or "support" logic in this baseline.
    """
    def __init__(self, hidden_dim=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.attention = SelfAttention(hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        """
        x: (batch, 192, 1) representing the 'history' chunk
        returns: (batch, 192, 1) as the 'forecast'
        """
        lstm_out, (hn, cn) = self.lstm(x)    # (B, 192, hidden_dim)
        attn_out = self.attention(lstm_out)  # (B, 192, hidden_dim)
        pred = self.fc_out(attn_out)         # (B, 192, 1)
        return pred


In [4]:
def load_and_combine(csv_paths):
    df_list = []
    for path in csv_paths:
        temp_df = pd.read_csv(path, parse_dates=['timestamp'])
        temp_df.sort_values(by=['Station', 'timestamp'], inplace=True)
        df_list.append(temp_df)
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df.reset_index(drop=True, inplace=True)
    return combined_df

csv_files = [
    "../Data/training_data_january_2023.csv",
    "../Data/training_data_february_2023.csv"
]

full_df = load_and_combine(csv_files)
N = len(full_df)
train_end = int(0.70 * N)
support_end = int(0.85 * N)

train_df = full_df.iloc[:train_end].copy().reset_index(drop=True)
support_df = full_df.iloc[train_end:support_end].copy().reset_index(drop=True)
test_df   = full_df.iloc[support_end:].copy().reset_index(drop=True)

print("Training samples:", len(train_df))
print("Support samples:", len(support_df))
print("Test samples:",    len(test_df))

Training samples: 3639070
Support samples: 779801
Test samples: 779801


In [5]:
def create_4day_chunks(df_single_station):
    df_single_station = df_single_station.sort_values('timestamp').copy()
    df_single_station['date'] = df_single_station['timestamp'].dt.date
    
    day_groups = []
    for day, group in df_single_station.groupby('date'):
        day_groups.append((day, group.sort_values('timestamp')))
    
    chunks = []
    for i in range(len(day_groups) - 3):
        _, df0 = day_groups[i]
        _, df1 = day_groups[i+1]
        _, df2 = day_groups[i+2]
        _, df3 = day_groups[i+3]
        
        combined_df = pd.concat([df0, df1, df2, df3], ignore_index=True)
        combined_df = combined_df.sort_values('timestamp')
        
        if len(combined_df) == 384:
            flow_4days = combined_df['Total_Flow'].to_numpy()  # shape (384,)
            chunks.append(flow_4days)
    return chunks

def create_dataset(df):
    all_chunks = []
    for sid in df['Station'].unique():
        sid_df = df[df['Station'] == sid]
        station_chunks = create_4day_chunks(sid_df)
        all_chunks.extend(station_chunks)
    return np.array(all_chunks)  # shape (N, 384)

training_dataset = create_dataset(train_df)
support_dataset  = create_dataset(support_df)
test_dataset     = create_dataset(test_df)

print("training_dataset shape:", training_dataset.shape)
print("support_dataset shape:", support_dataset.shape)
print("test_dataset shape:",    test_dataset.shape)

training_dataset shape: (12322, 384)
support_dataset shape: (6159, 384)
test_dataset shape: (6160, 384)


In [7]:
import numpy as np
import torch

def get_batch(dataset, batch_size):
    """
    dataset: shape (N, 384), each row is a 4-day chunk
    returns:
      x_tensor: (B, 192, 1) -> model input
      y_tensor: (B, 192, 1) -> ground-truth forecast
    """
    idxs = np.random.choice(len(dataset), batch_size, replace=False)

    x_list = []
    y_list = []

    for i in idxs:
        chunk = dataset[i]         # shape (384,)
        x_ = chunk[:192]           # shape (192,) -> model input
        y_ = chunk[192:]           # shape (192,) -> ground-truth next 48 hrs
        x_list.append(x_)
        y_list.append(y_)

    x_arr = np.array(x_list)       # (B, 192)
    y_arr = np.array(y_list)       # (B, 192)

    # Add final dimension
    x_tensor = torch.tensor(x_arr, dtype=torch.float).unsqueeze(-1)  # (B, 192, 1)
    y_tensor = torch.tensor(y_arr, dtype=torch.float).unsqueeze(-1)  # (B, 192, 1)
    return x_tensor, y_tensor


In [9]:
# train_lstm_attn_no_diff.py

import torch
import torch.nn as nn
import torch.optim as optim
import csv

BATCH_SIZE = 16
EPOCHS = 10000
LEARNING_RATE = 1e-3
device = "cuda" if torch.cuda.is_available() else "cpu"

model = LSTMAttnNoDiff(hidden_dim=64, num_layers=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion_mae = nn.L1Loss()
criterion_mse = nn.MSELoss()

csv_filename = "lstm_attn_no_diff_log.csv"
with open(csv_filename, "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["epoch", "MAE", "MSE"])

for epoch in range(EPOCHS):
    total_mae = 0.0
    total_mse = 0.0

    # We'll define some number of steps per epoch, e.g. 100
    for step in range(100):
        x_batch, y_batch = get_batch(training_dataset, BATCH_SIZE)
        
        x_batch = x_batch.to(device)  # (B, 192, 1)
        y_batch = y_batch.to(device)  # (B, 192, 1)

        optimizer.zero_grad()
        y_pred = model(x_batch)       # => (B, 192, 1)

        loss_mae = criterion_mae(y_pred, y_batch)
        loss_mse = criterion_mse(y_pred, y_batch)

        loss_mae.backward()
        optimizer.step()

        total_mae += loss_mae.item()
        total_mse += loss_mse.item()

    avg_mae = total_mae / (step + 1)
    avg_mse = total_mse / (step + 1)
    print(f"Epoch {epoch+1}/{EPOCHS} - MAE: {avg_mae:.4f}, MSE: {avg_mse:.4f}")

    # Log to CSV
    with open(csv_filename, "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_mae, avg_mse])

    # Save model checkpoint
    torch.save(model.state_dict(), f"lstm_attn_no_diff_epoch_{epoch+1}.pth")


Epoch 1/10000 - MAE: 515.5018, MSE: 551678.6198
Epoch 2/10000 - MAE: 437.2960, MSE: 430067.9641
Epoch 3/10000 - MAE: 378.5706, MSE: 315365.2441
Epoch 4/10000 - MAE: 314.0687, MSE: 208331.8585
Epoch 5/10000 - MAE: 229.1798, MSE: 111450.6405
Epoch 6/10000 - MAE: 180.8925, MSE: 76238.8901
Epoch 7/10000 - MAE: 153.4647, MSE: 65621.5994
Epoch 8/10000 - MAE: 138.0925, MSE: 54543.6004
Epoch 9/10000 - MAE: 142.1254, MSE: 57341.3011
Epoch 10/10000 - MAE: 135.2029, MSE: 52794.6509
Epoch 11/10000 - MAE: 131.1772, MSE: 50591.0337
Epoch 12/10000 - MAE: 127.8600, MSE: 48293.0005
Epoch 13/10000 - MAE: 132.5879, MSE: 51714.9632
Epoch 14/10000 - MAE: 123.7688, MSE: 46419.5821
Epoch 15/10000 - MAE: 123.0908, MSE: 44879.4824
Epoch 16/10000 - MAE: 127.7152, MSE: 48731.9045
Epoch 17/10000 - MAE: 128.2412, MSE: 50225.6046
Epoch 18/10000 - MAE: 126.3472, MSE: 47694.3481
Epoch 19/10000 - MAE: 128.4446, MSE: 49850.1471
Epoch 20/10000 - MAE: 120.3379, MSE: 44673.6384
Epoch 21/10000 - MAE: 127.2551, MSE: 49021.9

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 3883/10000 - MAE: 116.1951, MSE: 42386.9628
Epoch 3884/10000 - MAE: 120.2351, MSE: 45283.7687
Epoch 3885/10000 - MAE: 121.4536, MSE: 45519.1561
Epoch 3886/10000 - MAE: 118.3120, MSE: 43738.6356
Epoch 3887/10000 - MAE: 118.6086, MSE: 44246.9841
Epoch 3888/10000 - MAE: 119.1733, MSE: 44751.5627
Epoch 3889/10000 - MAE: 120.1216, MSE: 45840.6152
Epoch 3890/10000 - MAE: 118.2274, MSE: 43863.1262
Epoch 3891/10000 - MAE: 118.0981, MSE: 45499.9932
Epoch 3892/10000 - MAE: 119.5490, MSE: 45226.4886
Epoch 3893/10000 - MAE: 120.6397, MSE: 45204.4406
Epoch 3894/10000 - MAE: 122.7349, MSE: 45922.5675
Epoch 4096/10000 - MAE: 118.0899, MSE: 43607.4642
Epoch 4097/10000 - MAE: 120.6171, MSE: 46303.4180
Epoch 4098/10000 - MAE: 112.6344, MSE: 40884.7575
Epoch 4099/10000 - MAE: 114.0810, MSE: 41036.2581
Epoch 4100/10000 - MAE: 115.2588, MSE: 42276.5917
Epoch 4101/10000 - MAE: 115.2761, MSE: 43305.8515
Epoch 4102/10000 - MAE: 114.7420, MSE: 42318.7948
Epoch 4103/10000 - MAE: 117.9537, MSE: 44093.1553


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

