📘 PatchTST for IoT Time Series Forecasting

PatchTST treats a time series like an image:
	•	Splits the series into small “patches” 🔲
	•	Embeds them like tokens
	•	Uses a pure Transformer (no convolutions or recurrence!)

This makes PatchTST extremely strong on long sequence forecasting.

In [1]:
# 📦 Step 1: Import Libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from einops import rearrange
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os
import sys

In [2]:
sys.path.append('../')  # Handle custom imports
from models.transformer_model_definitions import PatchTST

# ✅ Enable GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device)

Using: cpu


In [3]:
# 📂 Step 2: Load Preprocessed Splits
train_df = pd.read_csv("../data/processed/etth1_train.csv", parse_dates=["date"], index_col="date")
val_df = pd.read_csv("../data/processed/etth1_val.csv", parse_dates=["date"], index_col="date")
test_df = pd.read_csv("../data/processed/etth1_test.csv", parse_dates=["date"], index_col="date")


In [4]:
#parameters
target_column = "OT"
window_size = 96  # Must be divisible by patch_size
patch_size = 8
batch_size = 32
epochs = 10


In [5]:
# 🧩 Step 3: Create Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, series, window_size, patch_size):
        self.series = torch.tensor(series, dtype=torch.float32)
        self.window_size = window_size
        self.patch_size = patch_size

    def __len__(self): 
        return len(self.series) - self.window_size

    def __getitem__(self, idx):
        x = self.series[idx:idx+self.window_size]
        y = self.series[idx+self.window_size]
        patches = rearrange(x, '(p s) -> p s', s=self.patch_size)
        return patches.unsqueeze(-1), y

In [6]:
# 🔄 Step 4: Dataloaders
train_loader = DataLoader(TimeSeriesDataset(train_df[target_column].values, window_size, patch_size),
                          batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TimeSeriesDataset(val_df[target_column].values, window_size, patch_size),
                        batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TimeSeriesDataset(test_df[target_column].values, window_size, patch_size),
                         batch_size=batch_size, shuffle=False)

In [7]:

# ⚙️ Step 5: Train PatchTST Model
model = PatchTST(patch_size=patch_size, num_patches=window_size // patch_size).to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device).unsqueeze(1)
        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch+1}] Train Loss: {total_loss / len(train_loader):.4f}")


[Epoch 1] Train Loss: 0.0416
[Epoch 2] Train Loss: 0.0043
[Epoch 3] Train Loss: 0.0042
[Epoch 4] Train Loss: 0.0039
[Epoch 5] Train Loss: 0.0040
[Epoch 6] Train Loss: 0.0038
[Epoch 7] Train Loss: 0.0036
[Epoch 8] Train Loss: 0.0036
[Epoch 9] Train Loss: 0.0035
[Epoch 10] Train Loss: 0.0031


In [8]:

# 💾 Save model
os.makedirs("../models/checkpoints", exist_ok=True)
torch.save(model.state_dict(), "../models/checkpoints/patchTST_transformer_model.pth")
print("✅ Model saved")

✅ Model saved


In [9]:
# 📊 Step 6: Define Evaluation
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return 100 * np.mean(diff)


In [10]:
def evaluate_model(dataloader, dataset_df, title, file_prefix):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device).unsqueeze(1)
            pred = model(x)
            preds.append(pred.cpu().numpy())
            trues.append(y.cpu().numpy())

    predictions = np.concatenate(preds).flatten()
    true_values = np.concatenate(trues).flatten()

    # Inverse transform
    dummy_shape = (predictions.shape[0], dataset_df.shape[1])
    predictions_full = np.zeros(dummy_shape)
    true_values_full = np.zeros(dummy_shape)
    predictions_full[:, -1] = predictions
    true_values_full[:, -1] = true_values

    scaler = MinMaxScaler()
    scaler.fit(dataset_df)
    predictions = scaler.inverse_transform(predictions_full)[:, -1]
    true_values = scaler.inverse_transform(true_values_full)[:, -1]

    # Metrics
    mse = mean_squared_error(true_values, predictions)
    mae = mean_absolute_error(true_values, predictions)
    smape_val = smape(true_values, predictions)

    print(f"📊 {title}")
    print(f" - MSE   : {mse:.6f}")
    print(f" - MAE   : {mae:.6f}")
    print(f" - SMAPE : {smape_val:.2f}%")

    # Plot & save
    output_dir = "../../outputs/metrics/patchTST"
    os.makedirs(output_dir, exist_ok=True)
    plt.figure(figsize=(12, 6))
    plt.plot(true_values[:100], label="True")
    plt.plot(predictions[:100], label="Predicted")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{output_dir}/{file_prefix}_plot.png")
    print(f"✅ Plot saved to {output_dir}/{file_prefix}_plot.png")
    plt.close()

    # Save CSV
    pd.DataFrame({
        "True Values": true_values,
        "Predictions": predictions
    }).to_csv(f"{output_dir}/{file_prefix}_results.csv", index=False)
    print(f"✅ Results saved to {output_dir}/{file_prefix}_results.csv\n")


In [11]:

# 📈 Step 7: Run Evaluation on Train/Val/Test
evaluate_model(train_loader, train_df, "📈 Train Set Forecast vs True", "train")
evaluate_model(val_loader, val_df, "📈 Validation Set Forecast vs True", "val")
evaluate_model(test_loader, test_df, "📈 Test Set Forecast vs True", "test")

📊 📈 Train Set Forecast vs True
 - MSE   : 0.006373
 - MAE   : 0.067395
 - SMAPE : 20.55%
✅ Plot saved to ../../outputs/metrics/patchTST/train_plot.png
✅ Results saved to ../../outputs/metrics/patchTST/train_results.csv



  plt.savefig(f"{output_dir}/{file_prefix}_plot.png")


📊 📈 Validation Set Forecast vs True
 - MSE   : 0.000419
 - MAE   : 0.017423
 - SMAPE : 13.89%
✅ Plot saved to ../../outputs/metrics/patchTST/val_plot.png
✅ Results saved to ../../outputs/metrics/patchTST/val_results.csv



  plt.savefig(f"{output_dir}/{file_prefix}_plot.png")


📊 📈 Test Set Forecast vs True
 - MSE   : 0.000404
 - MAE   : 0.017387
 - SMAPE : 7.64%
✅ Plot saved to ../../outputs/metrics/patchTST/test_plot.png
✅ Results saved to ../../outputs/metrics/patchTST/test_results.csv



  plt.savefig(f"{output_dir}/{file_prefix}_plot.png")


In [12]:
# ✅ Next Suggestions
# Would you like help with:

# Multi-step PatchTST forecasting?

# Multivariate PatchTST support?

# Leaderboard for all 4 models (LSTM, Informer, Autoformer, PatchTST)?