In [1]:
# 📦 Step 1: Import Libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os
import sys


In [2]:
sys.path.append('../')  # Import custom models

from models.timeseries_dataset_class import TimeSeriesDataset
from models.transformer_model_definitions import ProbSparseSelfAttention, InformerBlock, InformerForecast


# ✅ GPU Support
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device)

Using: cpu


In [3]:

# 📂 Step 2: Load Train/Val/Test Data
train_df = pd.read_csv("../data/processed/etth1_train.csv", parse_dates=["date"], index_col="date")
val_df = pd.read_csv("../data/processed/etth1_val.csv", parse_dates=["date"], index_col="date")
test_df = pd.read_csv("../data/processed/etth1_test.csv", parse_dates=["date"], index_col="date")


In [4]:
#parameters

target_column = "OT"
window_size = 96
batch_size = 32
epochs = 10
learning_rate = 1e-3

In [5]:
# 🔄 Step 3: Dataloaders
train_loader = DataLoader(TimeSeriesDataset(train_df[target_column].values, window_size),
                          batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TimeSeriesDataset(val_df[target_column].values, window_size),
                        batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TimeSeriesDataset(test_df[target_column].values, window_size),
                         batch_size=batch_size, shuffle=False)

In [6]:
# ⚙️ Step 4: Train Informer Model
model = InformerForecast().to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device).unsqueeze(1)
        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch+1}] Train Loss: {total_loss / len(train_loader):.4f}")

[Epoch 1] Train Loss: 0.0081
[Epoch 2] Train Loss: 0.0005
[Epoch 3] Train Loss: 0.0005
[Epoch 4] Train Loss: 0.0005
[Epoch 5] Train Loss: 0.0005
[Epoch 6] Train Loss: 0.0005
[Epoch 7] Train Loss: 0.0005
[Epoch 8] Train Loss: 0.0005
[Epoch 9] Train Loss: 0.0005
[Epoch 10] Train Loss: 0.0005


In [7]:
# 💾 Save model
os.makedirs("../models/checkpoints", exist_ok=True)
torch.save(model.state_dict(), "../models/checkpoints/informer_transformer_model.pth")
print("✅ Model saved")


✅ Model saved


In [8]:
# 📊 Step 5: Define Evaluation Function
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return 100 * np.mean(diff)


In [9]:
def evaluate_model(dataloader, dataset_df, title, file_prefix):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device).unsqueeze(1)
            pred = model(x)
            preds.append(pred.cpu().numpy())
            trues.append(y.cpu().numpy())

    predictions = np.concatenate(preds, axis=0).flatten()
    true_values = np.concatenate(trues, axis=0).flatten()

    # Inverse transform (only OT column)
    dummy_shape = (predictions.shape[0], dataset_df.shape[1])
    predictions_full = np.zeros(dummy_shape)
    true_values_full = np.zeros(dummy_shape)
    predictions_full[:, -1] = predictions
    true_values_full[:, -1] = true_values

    scaler = MinMaxScaler()
    scaler.fit(dataset_df)
    predictions = scaler.inverse_transform(predictions_full)[:, -1]
    true_values = scaler.inverse_transform(true_values_full)[:, -1]

    # Calculate metrics
    mse = mean_squared_error(true_values, predictions)
    mae = mean_absolute_error(true_values, predictions)
    smape_val = smape(true_values, predictions)

    print(f"📊 {title}")
    print(f" - MSE   : {mse:.6f}")
    print(f" - MAE   : {mae:.6f}")
    print(f" - SMAPE : {smape_val:.2f}%")

    # Plot
    os.makedirs(f"../../outputs/metrics/informer", exist_ok=True)
    plt.figure(figsize=(12, 6))
    plt.plot(true_values[:100], label="True")
    plt.plot(predictions[:100], label="Predicted")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.savefig(f"../../outputs/metrics/informer/{file_prefix}_plot.png")
    print(f"✅ Plot saved to ../../outputs/metrics/informer/{file_prefix}_plot.png")
    plt.close()

    # Save CSV
    pd.DataFrame({
        "True Values": true_values,
        "Predictions": predictions
    }).to_csv(f"../../outputs/metrics/informer/{file_prefix}_results.csv", index=False)
    print(f"✅ Results saved to ../../outputs/metrics/informer/{file_prefix}_results.csv\n")



In [10]:
# 📈 Step 6: Run Evaluation on All Splits
evaluate_model(train_loader, train_df, "📈 Train Set Forecast vs True", "train")
evaluate_model(val_loader, val_df, "📈 Validation Set Forecast vs True", "val")
evaluate_model(test_loader, test_df, "📈 Test Set Forecast vs True", "test")

📊 📈 Train Set Forecast vs True
 - MSE   : 0.000383
 - MAE   : 0.013330
 - SMAPE : 4.35%
✅ Plot saved to ../../outputs/metrics/informer/train_plot.png
✅ Results saved to ../../outputs/metrics/informer/train_results.csv



  plt.savefig(f"../../outputs/metrics/informer/{file_prefix}_plot.png")


📊 📈 Validation Set Forecast vs True
 - MSE   : 0.000014
 - MAE   : 0.002651
 - SMAPE : 2.25%
✅ Plot saved to ../../outputs/metrics/informer/val_plot.png
✅ Results saved to ../../outputs/metrics/informer/val_results.csv



  plt.savefig(f"../../outputs/metrics/informer/{file_prefix}_plot.png")


📊 📈 Test Set Forecast vs True
 - MSE   : 0.000014
 - MAE   : 0.002511
 - SMAPE : 1.14%
✅ Plot saved to ../../outputs/metrics/informer/test_plot.png
✅ Results saved to ../../outputs/metrics/informer/test_results.csv



  plt.savefig(f"../../outputs/metrics/informer/{file_prefix}_plot.png")


In [11]:
# ✅ Next Steps?

# Would you like help with:

# Multi-step forecasting instead of 1-step?

# Multi-variate forecasting (use all features)?

# Transformer + embedding visualization?