In [None]:
📘 Step 3: Transformer-Based Time Series Forecasting (Informer)

We’ll use the Hugging Face 🤗 implementation of a pretrained Transformer model adapted for time series forecasting.

Since Transformer time series models are not in Hugging Face’s core repo, we’ll use the open-source repo: Informer2020 (by Haoyi Zhou)

But first, here’s a simplified PyTorch-based custom Transformer that you can use as a baseline:

In [None]:
# 📦 Imports
import torch
import torch.nn as nnimport torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pdimport pandas as pd
import numpy as np
import matplotlib.pyplot as pltimport matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:

# 📂 Load the dataset
df = pd.read_csv("../data/processed/etth1_processed.csv", parse_dates=["date"], index_col="date")
target = df["OT"].values


In [None]:
# 🧩 Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, series, window_size):
        self.series = torch.tensor(series, dtype=torch.float32)
        self.window_size = window_size

    def __len__(self):
        return len(self.series) - self.window_size

    def __getitem__(self, idx):
        x = self.series[idx:idx + self.window_size]
        y = self.series[idx + self.window_size]
        return x.unsqueeze(1), y


In [None]:

# 🔄 Create Dataloader
window_size = 48
batch_size = 32
dataset = TimeSeriesDataset(target, window_size)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# 🤖 Transformer Encoder Model (Simplified)
class TransformerForecast(nn.Module):
    def __init__(self, input_size=1, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.embedding = nn.Linear(input_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.output = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.embedding(x)  # [batch, seq_len, d_model]
        x = self.transformer(x)
        return self.output(x[:, -1, :])  # last token for prediction


In [None]:
# ⚙️ Training Loop
model = TransformerForecast().to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x, y in dataloader:
        x, y = x.to(device), y.to(device).unsqueeze(1)
        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} | Loss: {total_loss / len(dataloader):.4f}")


In [None]:
# 📈 Plot Predictions
model.eval()
x_test, y_test = next(iter(dataloader))
x_test = x_test.to(device)
with torch.no_grad():
    y_pred = model(x_test).cpu().numpy()
    y_true = y_test.numpy()

plt.figure(figsize=(10, 5))
plt.plot(y_true[:50], label="True")
plt.plot(y_pred[:50], label="Predicted")
plt.title("🔮 Transformer Forecast vs True")
plt.legend()
plt.grid(True)
plt.show()

In [None]:

✅ Summary
	•	✅ This model uses a basic Transformer Encoder for prediction.
	•	🧠 You can later replace it with more powerful variants: Informer, Autoformer, PatchTST.
	•	🔧 Easy to integrate with existing dataset and preprocessing steps.
