In [1]:
# ==============================
# Advanced Time Series Forecasting
# Transformer vs LSTM Baseline
# ==============================

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ==============================
# 1. DATA GENERATION
# ==============================

def generate_time_series(n_steps=1200):
    np.random.seed(42)
    time = np.arange(n_steps)

    trend = 0.05 * time
    season1 = 10 * np.sin(2 * np.pi * time / 50)
    season2 = 5 * np.cos(2 * np.pi * time / 100)

    feature1 = trend + season1 + np.random.normal(0, 1, n_steps)
    feature2 = trend * 0.5 + season2 + np.random.normal(0, 1, n_steps)
    feature3 = season1 * 0.3 + np.random.normal(0, 0.5, n_steps)
    feature4 = season2 * 0.7 + np.random.normal(0, 0.5, n_steps)
    feature5 = trend * 0.2 + np.random.normal(0, 1, n_steps)

    data = np.vstack([feature1, feature2, feature3, feature4, feature5]).T
    return pd.DataFrame(data)


def create_sequences(data, seq_length=30):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)


# ==============================
# 2. TRANSFORMER MODEL
# ==============================

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        )

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)].to(x.device)


class TransformerEncoderBlock(nn.Module):
    def __init__(self, d_model, n_heads, dim_ff, dropout=0.1):
        super().__init__()

        self.attn = nn.MultiheadAttention(d_model, n_heads,
                                          dropout=dropout,
                                          batch_first=True)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        self.ff = nn.Sequential(
            nn.Linear(d_model, dim_ff),
            nn.ReLU(),
            nn.Linear(dim_ff, d_model)
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        attn_output, attn_weights = self.attn(x, x, x)
        x = self.norm1(x + self.dropout(attn_output))

        ff_output = self.ff(x)
        x = self.norm2(x + self.dropout(ff_output))

        return x, attn_weights


class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim,
                 d_model=64,
                 n_heads=4,
                 num_layers=2,
                 dim_ff=128):

        super().__init__()

        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_enc = PositionalEncoding(d_model)

        self.layers = nn.ModuleList([
            TransformerEncoderBlock(d_model, n_heads, dim_ff)
            for _ in range(num_layers)
        ])

        self.output = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.input_proj(x)
        x = self.pos_enc(x)

        for layer in self.layers:
            x, attn_weights = layer(x)

        out = self.output(x[:, -1, :])
        return out, attn_weights


# ==============================
# 3. LSTM BASELINE
# ==============================

class LSTMBaseline(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])


# ==============================
# 4. TRAINING FUNCTION
# ==============================

def train_model(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()

        if isinstance(model, TimeSeriesTransformer):
            outputs, _ = model(X_batch)
        else:
            outputs = model(X_batch)

        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


# ==============================
# 5. EVALUATION
# ==============================

def evaluate(model, loader, device):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)

            if isinstance(model, TimeSeriesTransformer):
                outputs, attn = model(X_batch)
            else:
                outputs = model(X_batch)

            y_true.extend(y_batch.numpy())
            y_pred.extend(outputs.cpu().numpy())

    y_true = np.array(y_true)
    y_pred = np.array(y_pred).flatten()

    mae = np.mean(np.abs(y_true - y_pred))
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    return mae, rmse, mape


# ==============================
# 6. MAIN EXECUTION
# ==============================

def main():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Generate data
    df = generate_time_series()
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(df)

    # Create sequences
    X, y = create_sequences(data_scaled, seq_length=30)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False)

    # Convert to tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)

    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=32,
        shuffle=True
    )

    test_loader = DataLoader(
        TensorDataset(X_test, y_test),
        batch_size=32,
        shuffle=False
    )

    # ==========================
    # Transformer Training
    # ==========================

    transformer = TimeSeriesTransformer(input_dim=5).to(device)
    optimizer = optim.Adam(transformer.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    print("Training Transformer...")

    for epoch in range(20):
        loss = train_model(transformer, train_loader,
                           optimizer, criterion, device)
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

    mae, rmse, mape = evaluate(transformer, test_loader, device)

    print("\nTransformer Performance")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("MAPE:", mape)

    # ==========================
    # LSTM Training
    # ==========================

    lstm = LSTMBaseline(input_dim=5).to(device)
    optimizer_lstm = optim.Adam(lstm.parameters(), lr=0.001)

    print("\nTraining LSTM...")

    for epoch in range(20):
        loss = train_model(lstm, train_loader,
                           optimizer_lstm, criterion, device)
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

    mae_lstm, rmse_lstm, mape_lstm = evaluate(
        lstm, test_loader, device)

    print("\nLSTM Performance")
    print("MAE:", mae_lstm)
    print("RMSE:", rmse_lstm)
    print("MAPE:", mape_lstm)


if __name__ == "__main__":
    main()

Training Transformer...
Epoch 1, Loss: 0.1096
Epoch 2, Loss: 0.0195
Epoch 3, Loss: 0.0138
Epoch 4, Loss: 0.0125
Epoch 5, Loss: 0.0099
Epoch 6, Loss: 0.0100
Epoch 7, Loss: 0.0090
Epoch 8, Loss: 0.0079
Epoch 9, Loss: 0.0065
Epoch 10, Loss: 0.0068
Epoch 11, Loss: 0.0077
Epoch 12, Loss: 0.0072
Epoch 13, Loss: 0.0090
Epoch 14, Loss: 0.0069
Epoch 15, Loss: 0.0071
Epoch 16, Loss: 0.0085
Epoch 17, Loss: 0.0078
Epoch 18, Loss: 0.0083
Epoch 19, Loss: 0.0067
Epoch 20, Loss: 0.0061

Transformer Performance
MAE: 0.09234586
RMSE: 0.11647644
MAPE: 7.046276

Training LSTM...
Epoch 1, Loss: 0.3347
Epoch 2, Loss: 0.0498
Epoch 3, Loss: 0.0195
Epoch 4, Loss: 0.0083
Epoch 5, Loss: 0.0067
Epoch 6, Loss: 0.0062
Epoch 7, Loss: 0.0060
Epoch 8, Loss: 0.0056
Epoch 9, Loss: 0.0055
Epoch 10, Loss: 0.0054
Epoch 11, Loss: 0.0051
Epoch 12, Loss: 0.0051
Epoch 13, Loss: 0.0052
Epoch 14, Loss: 0.0052
Epoch 15, Loss: 0.0048
Epoch 16, Loss: 0.0049
Epoch 17, Loss: 0.0052
Epoch 18, Loss: 0.0046
Epoch 19, Loss: 0.0048
Epoch 