https://colab.research.google.com/github/jeffheaton/app_deep_learning/blob/main/t81_558_class_10_3_transformer_timeseries.ipynb#scrollTo=hoT-VFSdOANz

In [4]:
# Make use of a GPU or MPS (Apple) if one is available.  (see module 3.2)
import torch
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Note: not using Google CoLab
Using device: cuda


In [5]:
BATCH_SIZE = 32

In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

 

In [7]:
df = pd.read_csv("merged.csv", usecols=["INNER_TPRT_1", "EXTN_TPRT", "INNER_HMDT_1"])
df_train = df.iloc[:8000]
df_test = df[8000:10000]

cols = ['INNER_TPRT_1', "EXTN_TPRT"]
# y_cols = ["INNER_HMDT_1"]

spots_train = df_train[cols].to_numpy().reshape(-1, len(cols))
spots_test = df_test[cols].to_numpy().reshape(-1, len(cols))

spots_train[:5], spots_test[:5]

(   INNER_TPRT_1  EXTN_TPRT  INNER_HMDT_1
 0         15.45      -1.35         87.05
 1         15.30      -1.30         87.45
 2         15.25      -1.50         88.40
 3         15.30      -1.55         87.95
 4         15.35      -1.40         87.95,
       INNER_TPRT_1  EXTN_TPRT  INNER_HMDT_1
 8000         13.87       2.67         85.37
 8001         13.80       2.57         85.17
 8002         13.53       2.50         86.20
 8003         13.83       2.37         85.20
 8004         13.50       2.47         86.43)

In [9]:
# Sequence Data Preparation
SEQUENCE_SIZE = 10

def to_sequences(seq_size, obs):
    x = []
    y = []
    for i in range(len(obs) - seq_size):
        window = obs[i:(i + seq_size)]
        after_window = obs[i + seq_size]
        x.append(window)
        y.append(after_window)
    return (
        torch.tensor(x, dtype=torch.float32).view(-1, seq_size, len(cols)),
        torch.tensor(y, dtype=torch.float32).view(-1, len(cols))
    )

x_train, y_train = to_sequences(SEQUENCE_SIZE, spots_train)
x_test, y_test = to_sequences(SEQUENCE_SIZE, spots_test)

x_train.shape, y_train.shape

(torch.Size([7990, 10, 2]), torch.Size([7990, 1]))

In [10]:
# Setup data loaders for batch
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x
    
# Model definition using Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim=1, output_size=1, d_model=32, nhead=4, num_layers=1):
        super(TransformerModel, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, output_size)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.decoder(x[:, -1, :])
        return x

In [12]:
# Train the model
model = TransformerModel(input_dim=len(cols), output_size=len(cols)).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)



In [13]:
import matplotlib.pyplot as plt

epochs = 10
early_stop_count = 0
min_val_loss = float('inf')

for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for batch in test_loader:
            x_batch, y_batch = batch
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            val_losses.append(loss.item())

    val_loss = np.mean(val_losses)
    scheduler.step(val_loss)

    # if val_loss < min_val_loss:
    #     min_val_loss = val_loss
    #     early_stop_count = 0
    # else:
    #     early_stop_count += 1

    # if early_stop_count >= 5:
    #     print("Early stopping!")
    #     break
    print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss:.4f}")
    



RuntimeError: mat1 and mat2 shapes cannot be multiplied (320x2 and 3x32)

In [9]:
# Evaluation
model.eval()
predictions = []
with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch = x_batch.to(device)
        outputs = model(x_batch)
        predictions.extend(outputs.squeeze().tolist())

rmse = np.sqrt(np.mean(
    (np.array(predictions).reshape(-1, 1) - y_test.numpy().reshape(-1, 1)))**2
)
print(f"Score (RMSE): {rmse:.4f}")

Score (RMSE): 0.6888


In [14]:
test_loader

32

In [18]:

x_test.shape, y_test.shape, np.array(predictions).shape

(torch.Size([40374, 10, 2]), torch.Size([40374, 2]), (40374, 2))