# Replicate the LSTM architecture with PyTorch + Lightning

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
# For Lighting version 2.x
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.loggers import TensorBoardLogger
from torch.utils.data import TensorDataset, DataLoader

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

## Setup device-agnostic code

In [2]:
device = (
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)
device

'mps'

## Create a training dataset

In [32]:
# Create a training data set
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]]).unsqueeze(dim=-1) # need to an extra dimension to the last dimension as the input size (or number of features)
labels = torch.tensor([[0.], [1.]])
inputs, labels, inputs.shape, labels.shape

(tensor([[[0.0000],
          [0.5000],
          [0.2500],
          [1.0000]],
 
         [[1.0000],
          [0.5000],
          [0.2500],
          [1.0000]]]),
 tensor([[0.],
         [1.]]),
 torch.Size([2, 4, 1]),
 torch.Size([2, 1]))

## Create a LSTM model

In [8]:
class LightningLSTM(L.LightningModule):

    def __init__(self, 
                 hidden_size: int = 3):

        super().__init__()

        # input_size means the number of features and hidden_size means the dimension of the hidden state (outcome of a LSTM unit)
        self.lstm = nn.LSTM(input_size=1, 
                            hidden_size=hidden_size,
                            batch_first=True) # False as default

        self.linear_layer = nn.Linear(in_features=hidden_size,
                                      out_features=1)
        
        self.loss = nn.MSELoss()

    def forward(self, input):

        lstm_out, temp = self.lstm(input) # input is of shape [batch_size, seq_len, num_features] 

        # Pass the hidden state to the linear layer to obtain model's final output
        prediction = self.linear_layer(lstm_out[:, -1, :]) # only need the hidden state of the final LSTM unit

        return prediction
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.1)
    
    def training_step(self, batch, batch_idx):
        input, label = batch
        prediction = self(input)
        loss = self.loss(prediction, label)        
        return loss

In [33]:
# Test the model
model = LightningLSTM(hidden_size=3).to(device)
model(inputs.to(device)) # need to an extra dimension to the last dimension as the input size (or number of features)

tensor([[-0.3479],
        [-0.3493]], device='mps:0', grad_fn=<LinearBackward0>)

## Train the model with `Lightning.Trainer.fit()`

In [34]:
# Turn the training data into dataloader
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)
len(dataloader)

2

In [35]:
trainer = L.Trainer(max_epochs=100)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type    | Params | Mode 
-------------------------------------------------
0 | lstm         | LSTM    | 72     | train
1 | linear_layer | Linear  | 4      | train
2 | loss         | MSELoss | 0      | train
-------------------------------------------------
76        Trainable params
0         Non-trainable params
76        Total params
0.000     Total estimated model params size (MB)
/Users/edison/Git/pytorch-lightning-deep-learning/myenv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/edison/Git/pytorch-lightning-deep-learning/myenv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The nu

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


In [52]:
# Test the model after training with both training samples
for i, label in enumerate(labels):
    print(f"Sample {i}: {label.item()} | {model(inputs[i].unsqueeze(dim=0)).item()}")

Sample 0: 0.0 | 0.018371671438217163
Sample 1: 1.0 | 0.9914801716804504
