<a href="https://colab.research.google.com/github/alrz199/transformers/blob/main/Transformer_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Using transformer architecture to classify FashionMnist data points.

In [None]:
!pip --quiet install pytorch-lightning torchmetrics

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.2/802.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m841.5/841.5 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
import math

In [None]:
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=torchvision.transforms.ToTensor(),
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=False,
                                           transform=torchvision.transforms.ToTensor(),
                                           download=True)

In [None]:
train_loader = DataLoader(dataset=train_dataset,
                                           batch_size=64,
                                           shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                                           batch_size=64,
                                           shuffle=False)

In [None]:
# check one batch of the data
inputs, targets = next(iter(train_loader))
print(inputs.shape, targets.shape)

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [None]:
inputs, targets = next(iter(test_loader))
print(inputs.shape, targets.shape)

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [None]:
#import libraries
from pytorch_lightning import LightningModule, Trainer
from torch.nn import functional as F
import torch.nn as nn
batch_size = 256
learning_rate = 0.0001

In [None]:
inputs.shape

torch.Size([64, 1, 28, 28])

In [None]:
class PositionalEncoding(nn.Module):
  # this function can be found in the pytorch toturials online

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [None]:
pos_encoder = PositionalEncoding(28, 0.1)
pos_encoder(inputs.reshape(28,64,28)).shape

torch.Size([28, 64, 28])

In [None]:
from torchmetrics import Accuracy
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class nnn(LightningModule):

    def __init__(self):
        super(nnn, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50, 10)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=28, nhead=4)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
        self.pos_encoder = PositionalEncoding(28, 0.1)
    def forward(self, x):
        batch = x.shape[0]
        x = x.reshape(28,batch,28)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.reshape(batch,28*28)
        x = F.leaky_relu(self.fc1(x))
        x = (self.fc2(x))
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        out=self.forward(x)
        loss =  nn.CrossEntropyLoss()(out,y)
        accuracy = Accuracy(task="multiclass", num_classes=10).to(device)
        acc = accuracy(out, y)
        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        #x=x.float()
        out=self.forward(x)
        loss =  nn.CrossEntropyLoss()(out,y)
        # acc=self.accuracy(torch.argmax(F.softmax(out,-1),dim=1).numpy(),(y.numpy()))
        accuracy = Accuracy(task="multiclass", num_classes=10).to(device)
        acc = accuracy(out, y)
        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss
# we need cosine scheduler to prevent Adam from producing unfavorable result
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        return {
        "optimizer": optimizer,
        "lr_scheduler": {
          "scheduler":  CosineAnnealingWarmRestarts(optimizer, T_0=1000, T_mult=2)}}

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
logger = TensorBoardLogger("tb_logs", name="my_model")
model=nnn()
AVAIL_GPUS = min(1, torch.cuda.device_count())
trainer = Trainer(
    max_epochs=10,
    callbacks=[EarlyStopping(monitor="val_loss")])

# Train the model ⚡
trainer.fit(model, train_loader,val_dataloaders=test_loader)
print('Finished Training')

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name                | Type                    | Params
----------------------------------------------------------------
0 | conv1               | Conv2d                  | 50    
1 | pool                | MaxPool2d               | 0     
2 | conv2               | Conv2d                  | 690   
3 | fc1                 | Linear                  | 37.6 K
4 | fc2                 | Linear                  | 39.2 K
5 | fc3                 | Linear                  | 510   
6 | encoder_layer       | TransformerEncoderLayer | 120 K 
7 | tran

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Finished Training


In [None]:
# we got 0.97 Acc in our validation set