<a href="https://colab.research.google.com/github/bhuvananatrajan1508-eng/Advanced-Time-Series-Forecasting/blob/main/Advanced_time_series_forecasting%20with%20DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Imports & Configuration

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
DATA_PATH = "/content/time-series forecasting.csv"

df = pd.read_csv(DATA_PATH)

print(df.columns)  # always check once

df['Date Time'] = pd.to_datetime(
    df['Date Time'],
    errors='coerce',
    infer_datetime_format=True
)

df = df.dropna(subset=['Date Time'])
df.set_index('Date Time', inplace=True)
df.head()

# Select multivariate features
features = [
    'T (degC)', 'p (mbar)', 'rh (%)', 'wv (m/s)', 'wd (deg)'
]

data = df[features]

# === Data acquisition verification ===

print("Total observations:", len(df))
print("Date range:", df.index.min(), "to", df.index.max())
print("Missing values:\n", df.isnull().sum())

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Step 3: Train–Validation Split (Time-Series Safe)
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.15)

train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size+val_size]
test_data = data.iloc[train_size+val_size:]


In [None]:
# Step 4: Scaling
scaler = StandardScaler()

train_scaled = scaler.fit_transform(train_data)
val_scaled = scaler.transform(val_data)
test_scaled = scaler.transform(test_data)

In [None]:
# Step 5: Sequence Dataset Builder
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_len=48):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_len]
        y = self.data[idx+self.seq_len, 0]  # Forecast temperature
        return x, y

In [None]:
# Step 6: DataLoaders
SEQ_LEN = 48
BATCH_SIZE = 64

train_ds = TimeSeriesDataset(train_scaled, SEQ_LEN)
val_ds = TimeSeriesDataset(val_scaled, SEQ_LEN)
test_ds = TimeSeriesDataset(test_scaled, SEQ_LEN)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Step 7: Baseline LSTM Model (No Attention)
class LSTMBaseline(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1])

In [None]:
# Step 8: LSTM with Temporal Attention (KEY PART)
class AttentionLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.attention = nn.Linear(hidden_dim, 1)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)

        attn_weights = torch.softmax(
            self.attention(lstm_out).squeeze(-1), dim=1
        )

        context = torch.sum(lstm_out * attn_weights.unsqueeze(-1), dim=1)
        output = self.fc(context)

        return output, attn_weights

In [None]:
# Step 9: Training Loop (Walk-Forward Compatible)
def train_model(model, loader, optimizer, criterion):
    model.train()
    losses = []

    for x, y in loader:
        optimizer.zero_grad()
        preds = model(x)[0] if isinstance(model(x), tuple) else model(x)
        loss = criterion(preds.squeeze(), y)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    return np.mean(losses)

In [None]:
def evaluate(model, loader):
    model.eval()
    preds, targets = [], []

    with torch.no_grad():
        for x, y in loader:
            output = model(x)
            out = output[0] if isinstance(output, tuple) else output
            preds.extend(out.squeeze().cpu().numpy())
            targets.extend(y.cpu().numpy())

    rmse = np.sqrt(mean_squared_error(targets, preds))
    mae = mean_absolute_error(targets, preds)
    return rmse, mae

In [None]:
# === Hyperparameter Search (Grid) ===
learning_rates = [0.001, 0.0005]
hidden_dims = [32, 64]
best_rmse = float("inf")
best_params = {"lr": None, "hidden_dim": None}
best_config = None

for lr in learning_rates:
    for hd in hidden_dims:
        model = AttentionLSTM(len(features), hd)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        for _ in range(3):  # short search
            train_model(model, train_loader, optimizer, criterion)

        rmse, _ = evaluate(model, val_loader)

        if rmse < best_rmse:
            best_rmse = rmse
            best_params["lr"] = lr
            best_params["hidden_dim"] = hd
            best_config = (lr, hd)

print("Best Hyperparameters:", best_config, "RMSE:", best_rmse)
print("Best Hyperparameters:", best_params)


In [None]:
# Step 10: Model Training
input_dim = len(features)
hidden_dim = 64

baseline = LSTMBaseline(input_dim, hidden_dim)
attention_model = AttentionLSTM(input_dim, hidden_dim)

criterion = nn.MSELoss()
optimizer_base = torch.optim.Adam(baseline.parameters(), lr=0.001)
optimizer_attn = torch.optim.Adam(attention_model.parameters(), lr=0.001)

EPOCHS = 5

for epoch in range(EPOCHS):
    base_loss = train_model(baseline, train_loader, optimizer_base, criterion)
    attn_loss = train_model(attention_model, train_loader, optimizer_attn, criterion)

    print(f"Epoch {epoch+1} | Baseline: {base_loss:.4f} | Attention: {attn_loss:.4f}")

In [None]:
# Step 11: Quantitative Evaluation (Baseline vs Attention)

baseline_rmse, baseline_mae = evaluate(baseline, val_loader)
attn_rmse, attn_mae = evaluate(attention_model, val_loader)

print("Validation Results")
print(f"Baseline LSTM  -> RMSE: {baseline_rmse:.4f}, MAE: {baseline_mae:.4f}")
print(f"Attention LSTM -> RMSE: {attn_rmse:.4f}, MAE: {attn_mae:.4f}")


In [None]:
# =========================================================
# FIX-3: Test Set Evaluation (CRITICAL)
# =========================================================

test_loader = DataLoader(
    test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_rmse, test_mae = evaluate(attention_model, test_loader)
base_test_rmse, base_test_mae = evaluate(baseline, test_loader)

print("\nTest Set Results")
print(f"Baseline LSTM  → RMSE: {base_test_rmse:.4f}, MAE: {base_test_mae:.4f}")
print(f"Attention LSTM → RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}")


In [None]:
# Extract one test sample for attention analysis
x_sample, _ = test_ds[100]
x_sample = x_sample.unsqueeze(0)

_, attention_weights = attention_model(x_sample)
attention_np = attention_weights.squeeze().detach().cpu().numpy()


In [None]:
# Step 12b: Attention Interpretation

attn_np = attention_weights.squeeze().detach().cpu().numpy()

top_k = 5
top_indices = np.argsort(attn_np)[-top_k:][::-1]

print("Top influential time steps:")
for idx in top_indices:
    print(f"Time step {idx} -> weight {attn_np[idx]:.4f}")



In [None]:
# Ensure model exists (safety check)
assert 'attention_model' in globals(), "Run Step 10 (training) before Step 13"

In [None]:
# =========================================================
# Step 13: Post-hoc Explainability (Integrated Gradients)
# =========================================================

from captum.attr import IntegratedGradients

attention_model.eval()

# Select one test sample
x_ig, _ = test_ds[100]
x_ig = x_ig.unsqueeze(0)

baseline = torch.zeros_like(x_ig)

ig = IntegratedGradients(lambda x: attention_model(x)[0])

attributions, delta = ig.attribute(
    x_ig,
    baseline,
    return_convergence_delta=True
)

attr_np = attributions.squeeze().detach().cpu().numpy()

# Feature-wise attribution
feature_importance = np.mean(np.abs(attr_np), axis=0)

ig_df = pd.DataFrame({
    "Feature": features,
    "Integrated_Gradient_Importance": feature_importance
}).sort_values(by="Integrated_Gradient_Importance", ascending=False)

print("\nIntegrated Gradients Feature Importance:")
print(ig_df)

ig_df.to_csv("integrated_gradients_feature_importance.csv", index=False)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Convert attention weights to numpy (already detached)
attn = attention_np.reshape(1, -1)  # shape: (1, time_steps)

plt.figure(figsize=(12, 2))
sns.heatmap(
    attn,
    cmap="viridis",
    cbar=True,
    xticklabels=10,
    yticklabels=False
)
plt.title("Attention Heatmap Across Historical Time Steps")
plt.xlabel("Time Steps (Past → Present)")
plt.show()


In [None]:
# Assuming first feature is temperature
temperature_series = x_sample.squeeze()[..., 0].detach().cpu().numpy()

fig, ax1 = plt.subplots(figsize=(12, 4))

# Temperature
ax1.plot(temperature_series, label="Temperature", color="tab:blue")
ax1.set_ylabel("Temperature (degC)", color="tab:blue")

# Attention
ax2 = ax1.twinx()
ax2.plot(attention_np, label="Attention Weight", color="tab:red", alpha=0.6)
ax2.set_ylabel("Attention Importance", color="tab:red")

plt.title("Temperature vs Attention Weights")
plt.xlabel("Historical Time Steps")
plt.show()


In [None]:
import numpy as np

top_k = 5
top_indices = np.argsort(attention_np)[-top_k:][::-1]

print("Top Important Time Steps (0 = oldest):")
for idx in top_indices:
    print(f"Time step {idx} → Attention = {attention_np[idx]:.4f}")


In [None]:
plt.figure(figsize=(10, 4))
plt.plot(attention_np)
plt.title("Temporal Attention Weights")
plt.xlabel("Time Steps")
plt.ylabel("Importance")
plt.savefig("attention_weights_plot.png", dpi=300)
plt.show()


In [None]:
# Step 14: Prediction vs Ground Truth Visualization

def get_predictions(model, dataset, n=300):
    model.eval()
    preds, targets = [], []

    with torch.no_grad():
        for i in range(n):
            x, y = dataset[i]
            out = model(x.unsqueeze(0))[0] if isinstance(model(x.unsqueeze(0)), tuple) else model(x.unsqueeze(0))
            preds.append(out.item())
            targets.append(y.item())

    return preds, targets


# Attention model predictions
preds, targets = get_predictions(attention_model, test_ds)

plt.figure(figsize=(12,4))
plt.plot(targets, label="Ground Truth", alpha=0.8)
plt.plot(preds, label="Attention LSTM Prediction", alpha=0.8)
plt.title("Test Set: Prediction vs Ground Truth (Temperature)")
plt.xlabel("Time Steps")
plt.ylabel("Temperature (scaled)")
plt.legend()
plt.show()
