In [1]:
pip install numpy pandas matplotlib torch scikit-learn statsmodels prophet optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.9/413.9 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.7.0


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
import optuna
import warnings

warnings.filterwarnings('ignore')

# ==========================================
# 1. DATA GENERATION
# ==========================================
def generate_multivariate_data(n_points=2000):
    np.random.seed(42)
    t = np.arange(n_points)

    # Simulate seasonality, trend, and noise
    series1 = 10 + 0.05 * t + 10 * np.sin(2 * np.pi * t / 50) + np.random.normal(0, 1, n_points)
    series2 = 20 + 0.02 * t + 5 * np.cos(2 * np.pi * t / 100) + np.random.normal(0, 0.5, n_points)

    # Target variable influenced by both series with a lag
    target = 0.6 * series1 + 0.4 * series2 + np.random.normal(0, 2, n_points)

    df = pd.DataFrame({'feature1': series1, 'feature2': series2, 'target': target})
    return df

# Data Preparation for Deep Learning
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length), :]
        y = data[i + seq_length, -1]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# ==========================================
# 2. DEEP LEARNING MODEL
# ==========================================
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        attn_weights = torch.tanh(self.attn(lstm_output))
        attn_weights = torch.softmax(attn_weights, dim=1)
        context = torch.sum(attn_weights * lstm_output, dim=1)
        return context, attn_weights

class AttentionLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim=1):
        super(AttentionLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.attention = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        context, weights = self.attention(lstm_out)
        out = self.fc(context)
        return out

# ==========================================
# 3. BASELINE MODELS
# ==========================================
def train_baselines(train_df, test_df):
    print("Training Baselines...")
    # SARIMA
    sarima_model = SARIMAX(train_df['target'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    sarima_res = sarima_model.fit(disp=False)
    sarima_pred = sarima_res.forecast(steps=len(test_df))

    # Prophet
    prophet_df = train_df.reset_index().rename(columns={'index': 'ds', 'target': 'y'})
    prophet_df['ds'] = pd.date_range(start='2020-01-01', periods=len(prophet_df), freq='D')
    m = Prophet(yearly_seasonality=True, daily_seasonality=False)
    m.fit(prophet_df)
    future = m.make_future_dataframe(periods=len(test_df))
    forecast = m.predict(future)
    prophet_pred = forecast['yhat'].iloc[-len(test_df):].values

    return sarima_pred, prophet_pred

# ==========================================
# 4. HYPERPARAMETER TUNING & EVALUATION
# ==========================================
def objective(trial, X_train, y_train):
    hidden_dim = trial.suggest_int('hidden_dim', 16, 64)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)

    model = AttentionLSTM(input_dim=3, hidden_dim=hidden_dim, num_layers=1)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # Training loop for tuning
    model.train()
    for epoch in range(10):
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output.squeeze(), y_train)
        loss.backward()
        optimizer.step()
    return loss.item()

# ==========================================
# MAIN EXECUTION FLOW
# ==========================================
if __name__ == "__main__":
    # Data Prep
    df = generate_multivariate_data()
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)

    SEQ_LENGTH = 20
    X, y = create_sequences(scaled_data, SEQ_LENGTH)

    split = int(0.8 * len(X))
    X_train, X_test = torch.FloatTensor(X[:split]), torch.FloatTensor(X[split:])
    y_train, y_test = torch.FloatTensor(y[:split]), torch.FloatTensor(y[split:])

    # Optuna Tuning
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=5)

    # Final Model Training
    best_params = study.best_params
    model = AttentionLSTM(3, best_params['hidden_dim'], 1)
    optimizer = torch.optim.Adam(model.parameters(), lr=best_params['lr'])

    print("Training Final Attention-LSTM model...")
    for epoch in range(50):
        model.train()
        optimizer.zero_grad()
        out = model(X_train)
        loss = nn.MSELoss()(out.squeeze(), y_train)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0: print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

    # Evaluation
    model.eval()
    with torch.no_grad():
        preds = model(X_test).numpy()

    # Baselines
    sarima_p, prophet_p = train_baselines(df.iloc[:split], df.iloc[split:])

    # Metrics
    print("\n--- PERFORMANCE SUMMARY ---")
    for name, p in zip(['Attention-LSTM', 'SARIMA', 'Prophet'], [preds.flatten(), sarima_p, prophet_p]):

        mae = mean_absolute_error(df['target'].iloc[-len(p):], p)
        print(f"{name} -> MAE: {mae:.4f}")

[I 2026-02-06 09:54:37,558] A new study created in memory with name: no-name-0904d355-ccd9-40dc-8e46-9fbe4fd4818c
[I 2026-02-06 09:54:46,534] Trial 0 finished with value: 0.17119081318378448 and parameters: {'hidden_dim': 50, 'lr': 0.0004494520184143668}. Best is trial 0 with value: 0.17119081318378448.
[I 2026-02-06 09:54:48,913] Trial 1 finished with value: 0.17057006061077118 and parameters: {'hidden_dim': 58, 'lr': 0.0009656702589346824}. Best is trial 1 with value: 0.17057006061077118.
[I 2026-02-06 09:54:50,752] Trial 2 finished with value: 0.3201044797897339 and parameters: {'hidden_dim': 38, 'lr': 0.0004756419905802464}. Best is trial 1 with value: 0.17057006061077118.
[I 2026-02-06 09:54:51,832] Trial 3 finished with value: 0.1034071072936058 and parameters: {'hidden_dim': 32, 'lr': 0.000892925158312943}. Best is trial 3 with value: 0.1034071072936058.
[I 2026-02-06 09:54:55,072] Trial 4 finished with value: 0.03533673658967018 and parameters: {'hidden_dim': 62, 'lr': 0.003349

Training Final Attention-LSTM model...
Epoch 0, Loss: 0.3122
Epoch 10, Loss: 0.0110
Epoch 20, Loss: 0.0106
Epoch 30, Loss: 0.0074
Epoch 40, Loss: 0.0052
Training Baselines...

--- PERFORMANCE SUMMARY ---
Attention-LSTM -> MAE: 81.5133
SARIMA -> MAE: 4.5770
Prophet -> MAE: 4.6337
