<a href="https://colab.research.google.com/github/amir-d2danalytics/DeepLearning/blob/main/Transformers_TS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing the libraries

In [None]:
!pip install torch
!pip install mlflow==1.20.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Importing the necessary libraries

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.preprocessing import StandardScaler

We'll use the Airline Passengers dataset as our time series data. Let's load the data and preprocess it:

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv', header=0, index_col=0, parse_dates=True, squeeze=True)
data = df.values.astype('float32')
scaler = StandardScaler()
data = scaler.fit_transform(data.reshape(-1, 1)).flatten()



  df = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv', header=0, index_col=0, parse_dates=True, squeeze=True)


Defining our transformer model:

In [None]:
class TransformerModel(nn.Module):
  def __init__(self, input_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout):
    super(TransformerModel, self).__init__()
    self.encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout), num_layers=num_encoder_layers)
    self.decoder = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout), num_layers=num_decoder_layers)
    self.linear = nn.Linear(d_model, 1)
    self.input_size = input_size
    self.d_model = d_model

  def forward(self, src, tgt):
    src = src.view(-1, self.input_size, self.d_model)
    tgt = tgt.view(-1, 1, self.d_model)
    memory = self.encoder(src)
    output = self.decoder(tgt, memory)
    output = self.linear(output.squeeze(-2))
    return output

Defining our training loop

In [None]:
def train_model(model, train_data, val_data, epochs, learning_rate, batch_size):
  criterion = nn.MSELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
  val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)
  for epoch in range(epochs):
    model.train()
    for idx, (src, tgt) in enumerate(train_loader):
      optimizer.zero_grad()
      output = model(src, tgt)
      loss = criterion(output, tgt)
      loss.backward()
      optimizer.step()
      mlflow.log_metric('train_loss', loss.item(), step=epoch * len(train_loader) + idx)
    model.eval()
    with torch.no_grad():
      val_loss = 0
      for idx, (src, tgt) in enumerate(val_loader):
        output = model(src, tgt)
        loss = criterion(output, tgt)
        val_loss += loss.item() * len(src)
      val_loss /= len(val_data)
      mlflow.log_metric('val_loss', val_loss, step=(epoch + 1) * len(train_loader))
    mlflow.pytorch.log_model(model, 'model', epoch + 1)

Running our training loop

In [None]:
if __name__ == '__main__':
  with mlflow.start_run():
    mlflow.log_param('input_size', 1)
    mlflow.log_param('d_model', 128)
    mlflow.log_param('nhead', 8)
    mlflow.log_param('num_encoder_layers', 3)
    mlflow.log_param('num_decoder_layers', 3)
    mlflow.log_param('dim_feedforward', 512)
    mlflow.log_param('dropout', 0.1)
    mlflow.log_param('epochs', 100)
    mlflow.log_param('learning_rate', 0.001)
    mlflow.log_param('batch_size', 32)
    mlflow.log_param('dataset', 'airline_passengers')

    train_data = torch.FloatTensor(data[:-12])
    val_data = torch.FloatTensor(data[-12:])
    model = TransformerModel(1, 128, 8, 3, 3, 512, 0.1)
    train_model(model, train_data, val_data, 100, 0.001, 32)