In [8]:
import json
import os
import numpy as np

def load_existing_results(file_path="forecasting_results.json"):
    """
    Load existing results from a JSON file.
    Returns an empty dictionary if the file doesn't exist.
    """
    if os.path.exists(file_path):
        with open(file_path, "r") as f:
            return json.load(f)
    return {}


def save_results_to_json(data, file_path="forecasting_results.json"):
    """
    Save the results dictionary to a JSON file, handling NumPy data types.
    """

    # Handle NumPy data types (recursive conversion)
    def convert_numpy(obj):
        if isinstance(obj, dict):
            return {k: convert_numpy(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [convert_numpy(i) for i in obj]
        elif isinstance(obj, (np.integer, np.int64, np.int32)):
            return int(obj)
        elif isinstance(obj, (np.floating, np.float64, np.float32)):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()  # Convert arrays to lists
        else:
            return obj

    # Convert data and save to JSON
    data = convert_numpy(data)
    with open(file_path, "w") as f:
        json.dump(data, f, indent=4)
    print(f"✅ Results saved to {file_path}")



def store_results(dataset_name, horizons, horizon_value, experiment_type, backbone, mae_result, file_path="forecasting_results.json"):
    """
    Store MAE results for a given experiment type (stl_mae, mtl_mae, global_mae) per horizon.

    Args:
    - dataset_name (str): Name of the dataset (e.g., 'Solar', 'Air Quality').
    - horizons (list): List of horizon values (e.g., [1, 2, 4, 8, 16]).
    - horizon_value (int): The horizon corresponding to the mae_result provided.
    - experiment_type (str): One of ['stl_mae', 'mtl_mae', 'global_mae'].
    - backbone (str): Model backbone name (e.g., 'Deep_LSTM', 'simple_transformer').
    - mae_result (list): MAE values for the current horizon (list of floats).
    - file_path (str): JSON file to store the results.

    Returns:
    - None
    """
    # Load existing results
    results_dict = load_existing_results(file_path)

    # Create dataset entry if it doesn't exist
    dataset_key = f"{dataset_name}_{backbone}"
    if dataset_key not in results_dict:
        results_dict[dataset_key] = {
            "horizons": horizons,
            "mtl": [[] for _ in horizons],
            "global": [[] for _ in horizons],
            "independent": [[] for _ in horizons]
        }

    # Find index for the given horizon
    try:
        horizon_index = horizons.index(horizon_value)
    except ValueError:
        raise ValueError(f"⚠️ Horizon value {horizon_value} not found in {horizons}.")

    # Append the mae_result to the correct horizon
    results_dict[dataset_key][experiment_type][horizon_index].extend(mae_result)

    # Save updated results
    save_results_to_json(results_dict, file_path)

In [9]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import dateutil.parser
import matplotlib.pyplot as plt

def df_to_X_y(df, features, target, window_size=32, horizon=1):
    if target not in features:
        features = [target] + features

    data = df[features].to_numpy()
    target_data = df[target].to_numpy()

    X, y = [], []
    for i in range(len(data) - window_size - horizon + 1):
        X.append(data[i:i + window_size])
        y.append(target_data[i + window_size: i + window_size + horizon])

    return np.array(X), np.array(y)

def load_and_preprocess_site_data(site_path,features, target ,window_size=32, horizon=16, min_date=None, max_date=None, batch_size=16, device='cpu'):
    """
    Loads and preprocesses time series data for a given site.

    Args:
    - site_path (str): Path to the site CSV file.
    - window_size (int): Number of past time steps for input.
    - horizon (int): Number of future steps to predict.

    Returns:
    - train_loader, val_loader, test_loader: DataLoaders for training, validation, and testing.
    """
    df = pd.read_csv(site_path)
    
    # Convert date column to datetime if it exists
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])

        # Filter data between min_date and max_date
        if min_date:
            min_date = dateutil.parser.parse(min_date) if isinstance(min_date, str) else min_date
            df = df[df['date'] >= min_date]

        if max_date:
            max_date = dateutil.parser.parse(max_date) if isinstance(max_date, str) else max_date
            df = df[df['date'] <= max_date]

        # Drop the date column after filtering
        df.drop(columns=['date'], inplace=True)
        
    
    # Perform an 80-20 split based on time order
    train_size = int(0.8 * len(df))
    train_df = df.iloc[:train_size]  # 80% for training & validation
    test_df = df.iloc[train_size:]   # 20% for final testing (future unseen data)

    # Split train_df further into Train (80%) and Validation (20%)
    val_size = int(0.2 * len(train_df))  # 16% of full dataset
    train_df, val_df = train_df.iloc[:-val_size], train_df.iloc[-val_size:]

    print(f"Train size: {len(train_df)} | Validation size: {len(val_df)} | Test size: {len(test_df)}")

    # Standardize each separately to prevent data leakage
    train_mean, train_std = train_df.mean(), train_df.std()
    
    train_df = (train_df - train_mean) / (train_std + 1e-8)
    val_df = (val_df - train_mean) / (train_std + 1e-8)  # Normalize validation using train stats
    test_df = (test_df - train_mean) / (train_std + 1e-8)  # Normalize test using train stats

    # Convert DataFrame to NumPy arrays for LSTM
    X_train, y_train = df_to_X_y(train_df,features, target, window_size, horizon)
    X_val, y_val = df_to_X_y(val_df, features, target, window_size, horizon)
    X_test, y_test = df_to_X_y(test_df,features, target, window_size, horizon)

    # Convert to PyTorch tensors
    # train_data = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
    # val_data = TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
    # test_data = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
    
    train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32).to(device), torch.tensor(y_train, dtype=torch.float32).to(device))
    val_data = TensorDataset(torch.tensor(X_val, dtype=torch.float32).to(device), torch.tensor(y_val, dtype=torch.float32).to(device))
    test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32).to(device), torch.tensor(y_test, dtype=torch.float32).to(device))

    # Create DataLoaders
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)
    val_loader = DataLoader(val_data, shuffle=False, batch_size=batch_size, drop_last=True)
    test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size, drop_last=True)

    return train_loader, val_loader, test_loader

In [10]:
# Imports
import torch, math
import numpy as np
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn.modules.transformer import TransformerEncoderLayer
from torch.optim.lr_scheduler import ExponentialLR
import os

# Positional Encoding - https://pytorch.org/tutorials/beginner/transformer_tutorial.html
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
    def forward(self, x: Tensor) -> Tensor:
        x = x + self.pe[:x.size(1)].transpose(0, 1)
        return self.dropout(x)

# A forcasting model
class ForecastingModel(torch.nn.Module):
    def __init__(self, 
                 seq_len=200,
                 embed_size = 16,
                 num_features=8,
                 horizon=1,
                 nhead = 2,
                 dim_feedforward = 2048,
                 dropout = 0.1,
                 conv1d_emb = True,
                 conv1d_kernel_size = 3,
                 device = "cuda"):
        super(ForecastingModel, self).__init__()

        # Set Class-level Parameters
        self.device = device
        self.conv1d_emb = conv1d_emb
        self.conv1d_kernel_size = conv1d_kernel_size
        self.seq_len = seq_len
        self.embed_size = embed_size

        # Input Embedding Component
        if conv1d_emb:
            if conv1d_kernel_size%2==0:
                raise Exception("conv1d_kernel_size must be an odd number to preserve dimensions.")
            self.conv1d_padding = conv1d_kernel_size - 1
            self.input_embedding  = nn.Conv1d(num_features, embed_size, kernel_size=conv1d_kernel_size)
        else: self.input_embedding  = nn.Linear(num_features, embed_size)

        # Positional Encoder Componet (See Code Copied from PyTorch Above)
        self.position_encoder = PositionalEncoding(d_model=embed_size, 
                                                   dropout=dropout,
                                                   max_len=seq_len)
        
        # Transformer Encoder Layer Component
        self.transformer_encoder = TransformerEncoderLayer(
            d_model = embed_size,
            nhead = nhead,
            dim_feedforward = dim_feedforward,
            dropout = dropout,
            batch_first = True
        )

        # Regression Component
        self.linear1 = nn.Linear(seq_len*embed_size, int(dim_feedforward))
        # self.linear2 = nn.Linear(int(dim_feedforward), int(dim_feedforward/2))
        # self.linear3 = nn.Linear(int(dim_feedforward/2), int(dim_feedforward/4))
        # self.linear4 = nn.Linear(int(dim_feedforward/4), int(dim_feedforward/16))
        # self.linear5 = nn.Linear(int(dim_feedforward/16), int(dim_feedforward/64))
        self.outlayer = nn.Linear(int(dim_feedforward), horizon)

        # Basic Components
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    # Model Forward Pass
    def forward(self, x):
        src_mask = self._generate_square_subsequent_mask()
        src_mask.to(self.device)
        if self.conv1d_emb: 
            x = F.pad(x, (0, 0, self.conv1d_padding, 0), "constant", -1)
            x = self.input_embedding(x.transpose(1, 2))
            x = x.transpose(1, 2)
        else: 
            x = self.input_embedding(x)
        x = self.position_encoder(x)
        x = self.transformer_encoder(x, src_mask=src_mask).reshape((-1, self.seq_len*self.embed_size))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        # x = self.relu(x)
        # x = self.dropout(x)
        # x = self.linear2(x)
        # x = self.relu(x)
        # x = self.dropout(x)
        # x = self.linear3(x)
        # x = self.relu(x)
        # x = self.dropout(x)
        # x = self.linear4(x)
        # x = self.relu(x)
        x = self.dropout(x)
        # x = self.linear5(x)
        x = self.relu(x)
        return self.outlayer(x)
    
    # Function Copied from PyTorch Library to create upper-triangular source mask
    def _generate_square_subsequent_mask(self):
        return torch.triu(
            torch.full((self.seq_len, self.seq_len), float('-inf'), dtype=torch.float32, device=self.device),
            diagonal=1,
        )

In [11]:
import torch
import torch.nn as nn
from sklearn.metrics import mean_absolute_error
import numpy as np
import os
from torch.optim.lr_scheduler import ExponentialLR

# ------------------ INDEPENDENT TRANSFORMER EXPERIMENT FOR MULTIPLE DATASETS ------------------

def run_experiment_mae_independent():
    """
    Runs independent Transformer (ForecastingModel) experiments for multiple datasets and horizons.
    Appends results to output.txt with dataset names and horizon details.
    """
    datasets = [
        {
            'name': 'Air Quality',
            'features': ['PM2.5', 'OT', 'PM10', 'NO2'],
            'target': 'PM2.5',
            'directory': "../processed_ds/air_quality_cluster/",
            'min_date': "2014-09-01",
            'max_date': "2014-11-12 19:00",
            'num_features': 4
        },
        {
            'name': 'Solar',
            'directory': "../processed_ds/solar/",
            'features': ['loc-1', 'loc-2', 'loc-3', 'loc-4'],
            'target': 'loc-1',
            'min_date': "2006-09-01",
            'max_date': "2006-09-08 4:50",
            'num_features': 4
        },
        {
            'name': 'Crypto',
            'directory': "../processed_ds/crypto-data/",
            'features': ['Open', 'High', 'Low', 'OT', 'Volume'],
            'target': 'OT',
            'min_date': "2018-04-01",
            'max_date': "2018-06-15",
            'num_features': 5
        },
        # {
        #     'name': 'Sales',
        #     'directory': "../processed_ds/stores_data/",
        #     'min_date': "2013-01-16",
        #     'max_date': "2015-07-31",
        #     'num_features': 7
        # }
    ]

    horizons = [1, 2, 4, 8, 16]
    device = "cuda" if torch.cuda.is_available() else "cpu"
    seq_len = 32
    embed_size = 128
    batch_size = 32
    num_epochs = 30
    dim_feedforward = 512
    dropout = 0.1

    for dataset in datasets:
        site_files = [
            os.path.join(dataset['directory'], f, f"{f}.csv")
            for f in os.listdir(dataset['directory'])
            if os.path.isdir(os.path.join(dataset['directory'], f))
        ]

        for horizon in horizons:
            site_mae_list = []
            print(f"\n==================== Dataset: {dataset['name']} | Horizon: {horizon} ====================")

            for site_path in site_files[0:30]:
                print(f"\nProcessing Site: {site_path}")

                train_loader, val_loader, test_loader = load_and_preprocess_site_data(
                    site_path,
                    horizon=horizon,
                    features=dataset['features'],
                    target=dataset['target'],
                    min_date=dataset['min_date'],
                    max_date=dataset['max_date'],
                    batch_size=batch_size,
                    device=device
                )

                model = ForecastingModel(
                    seq_len=seq_len,
                    embed_size=embed_size,
                    horizon=horizon,
                    num_features=dataset['num_features'],
                    dim_feedforward=dim_feedforward,
                    dropout=dropout,
                    device=device
                )
                total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
                print("Total trainable parameters:", total_params)
                model.to(device)

                optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
                scheduler = ExponentialLR(optimizer, gamma=0.95)
                criterion = nn.MSELoss()

                model.train()
                for epoch in range(num_epochs):
                    epoch_train_losses = []
                    for batch_x, batch_y in train_loader:
                        optimizer.zero_grad()
                        output = model(batch_x)
                        loss = criterion(output, batch_y)
                        loss.backward()
                        optimizer.step()
                        epoch_train_losses.append(loss.item())
                    scheduler.step()
                    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {np.mean(epoch_train_losses):.4f}")

                # Evaluate the model on test data
                model.eval()
                mae_criterion = nn.L1Loss()
                test_preds, test_targets = [], []
                with torch.no_grad():
                    for batch_x, batch_y in test_loader:
                        preds = model(batch_x)
                        test_preds.append(preds.cpu())
                        test_targets.append(batch_y.cpu())
                test_preds = torch.cat(test_preds, dim=0)
                test_targets = torch.cat(test_targets, dim=0)
                test_mae = mae_criterion(test_preds, test_targets)
                print(f"Site: {site_path}, Test MAE: {test_mae.item():.4f}")
                site_mae_list.append(test_mae.item())

            avg_mae = np.mean(site_mae_list)
            print(f"\nAverage MAE for {dataset['name']} at horizon {horizon}: {avg_mae:.4f}")

            # Append results to output.txt
            with open("output_test.txt", "a") as f:
                f.write("\n==================== Transformer INDEPENDENT FORECASTING MODEL RESULTS ====================\n")
                f.write(f"Dataset: {dataset['name']}\n")
                f.write(f"Horizon: {horizon}\n")
                f.write(f"MAE per site: {site_mae_list}\n")
                f.write(f"Mean MAE: {avg_mae:.4f}\n")
            store_results(
                dataset_name=dataset['name'],
                horizons=[1,2,4,8,16],
                experiment_type='independent',
                mae_result=site_mae_list,
                backbone='simple_transformer',
                horizon_value=horizon
            )

    print("\n🏆 All independent Forecasting Model experiments for all datasets and horizons completed successfully!")


In [12]:
run_experiment_mae_independent()



Processing Site: ../processed_ds/air_quality_cluster/site-11/site-11.csv
Train size: 1119 | Validation size: 279 | Test size: 350
Total trainable parameters: 2298113
Epoch 1/30 - Train Loss: 0.4796
Epoch 2/30 - Train Loss: 0.2373
Epoch 3/30 - Train Loss: 0.1791
Epoch 4/30 - Train Loss: 0.1577
Epoch 5/30 - Train Loss: 0.1446
Epoch 6/30 - Train Loss: 0.1219
Epoch 7/30 - Train Loss: 0.1251
Epoch 8/30 - Train Loss: 0.1117
Epoch 9/30 - Train Loss: 0.1111
Epoch 10/30 - Train Loss: 0.1057
Epoch 11/30 - Train Loss: 0.0994
Epoch 12/30 - Train Loss: 0.0990
Epoch 13/30 - Train Loss: 0.0905
Epoch 14/30 - Train Loss: 0.0891
Epoch 15/30 - Train Loss: 0.0862
Epoch 16/30 - Train Loss: 0.0867
Epoch 17/30 - Train Loss: 0.0861
Epoch 18/30 - Train Loss: 0.0882
Epoch 19/30 - Train Loss: 0.0788
Epoch 20/30 - Train Loss: 0.0799
Epoch 21/30 - Train Loss: 0.0779
Epoch 22/30 - Train Loss: 0.0776
Epoch 23/30 - Train Loss: 0.0722
Epoch 24/30 - Train Loss: 0.0660
Epoch 25/30 - Train Loss: 0.0712
Epoch 26/30 - Tr