In [16]:
# !pip install torch
# !pip install pytorch_lightning
# !pip install datasets
# !pip install torchmetrics 
# !pip install tensorboard 
# !pip install torchtext
# !pip install tokenizers
# !pip install -U rich

In [4]:
import torch
import torch.optim as optim
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import ModelSummary, RichProgressBar
import pytorch_lightning as pl
import torch.nn as nn 
import random
from random import randrange


In [5]:
from torch.utils.data import Dataset, DataLoader, random_split
from datasets import load_dataset 

In [6]:
from model import build_transformer
from config import get_config 
from dataset import BilingualDataset
from train import greedy_decode_pl, get_or_build_tokenizer, get_ds 

In [7]:
class Lit_Transformer(pl.LightningModule):

    def __init__(self):
        super().__init__()
        self.config = get_config()
        _, _, self.tokenizer_src, self.tokenizer_tgt = get_ds(self.config)
        self.model = build_transformer(self.tokenizer_src.get_vocab_size(), self.tokenizer_tgt.get_vocab_size(), 
                                       self.config["seq_len"], self.config["seq_len"], d_model=self.config["d_model"])
        self.loss_fn =  nn.CrossEntropyLoss(ignore_index=self.tokenizer_src.token_to_id('[PAD]'), label_smoothing=0.1)
        self.max_len = self.config['seq_len']

    def forward(self, x):
        encoder_input = x['encoder_input'] # (B, seq_len)
        decoder_input = x['decoder_input'] # (B, seq_len) 
        encoder_mask = x['encoder_mask'] # (B, 1, 1, seq_len) 
        decoder_mask = x['decoder_mask'] # (B, 1, seq_len,-seq_len) 
            
        # Run the tensors through the encoder, decoder and the projection layer 
        encoder_output = self.model.encode(encoder_input, encoder_mask) # (B, seq_len, d_model) 
        decoder_output = self.model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask) 
        proj_output = self.model.project(decoder_output) # (B, seq_len, vocab_size) 

        return proj_output
    
    def forward_inference(self, x): 
        encoder_input = x["encoder_input"] # (b, seq_len)
        encoder_mask = x["encoder_mask"] # (b, 1, 1, seq_len) 
        model_out = greedy_decode_pl(self.model, encoder_input, encoder_mask, self.tokenizer_src, self.tokenizer_tgt, self.max_len)
        source_text = x["src_text"][0]
        target_text = x["tgt_text"][0] 
        model_out_text = self.tokenizer_tgt.decode(model_out.detach().cpu().numpy()) 

        print(f'SOURCE: {source_text}')   
        print(f'TARGET: {target_text}')
        print(f'PREDICTED (Lavanya Nemani & Shashank Gupta): {model_out_text}')
        return None
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config['lr'], eps=1e-9)
        return {"optimizer": optimizer}
    
    def training_step(self, batch, batch_idx):
        x = batch
        proj_output = self.forward(x)
        label = x['label']
        loss = self.loss_fn(proj_output.view(-1, self.tokenizer_tgt.get_vocab_size()), label.view(-1)) 
        self.log("train_loss", loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        if batch_idx == 0: 
            x = batch
            self.forward_inference(x)
        return None 
    
    ## TODO: print only 1 random example from val_dataloader
    # def on_validation_epoch_end(self):
    #     val_loader = self.val_dataloader()
    #     random_batch_index = random.randint(0, len(val_loader))
    #     for batch_index, batch in enumerate(val_loader):
    #         if batch_index != random_batch_index:
    #             continue
    #         x = batch
    #         self.forward_inference(x)
    #     return 
    
    def get_dataloaders(self):
        train_loader, val_loader, _, _ = get_ds(self.config)
        return train_loader, val_loader 

    def train_dataloader(self):
        train_loader, _ = self.get_dataloaders()
        return train_loader

    def val_dataloader(self):
        _, val_loader = self.get_dataloaders()
        return val_loader


In [8]:
epochs = 10
model = Lit_Transformer()

print('Logs (Lavanya Nemani & Shashank Gupta)')

## TODO: implement multi-gpu training
trainer = Trainer(
    callbacks=[ModelSummary(max_depth=-1), RichProgressBar(leave=True)],
    accelerator="gpu", devices=[1],
    max_epochs = epochs,
    )
trainer.fit(model)

Max length of source sentence: 309
Max length of target sentence: 274


Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Logs (Lavanya Nemani & Shashank Gupta)


You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

    | Name                                                  | Type                    | Params
----------------------------------------------------------------------------------------------------
0   | model                                                 | Transformer             | 75.1 M
1   | model.encoder                                         | Encoder                 | 18.9 M
2   | model.encoder.layers                                  | ModuleList              | 18.9 M
3   | model.encoder.layers.0                                | EncoderBlock            | 3.1 M 
4   | model.encoder.la

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

`Trainer.fit` stopped: `max_epochs=10` reached.


In [10]:
trainer = Trainer(
    callbacks=[ModelSummary(max_depth=-1), RichProgressBar(leave=True)],
    accelerator="gpu", devices=[1],
    max_epochs = 4,
    )
trainer.fit(model)

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

    | Name                                                  | Type                    | Params
----------------------------------------------------------------------------------------------------
0   | model                                                 | Transformer             | 75.1 M
1   | model.encoder                                         | Encoder                 | 18.9 M
2   | model.encoder.layers                                  | ModuleList              | 18.9 M
3   | model.encoder.layers.0                                | EncoderBlock            | 3.1 M 
4   | model.encoder.layers.0.self_attention_block 

Output()

Output()

Output()

Output()

`Trainer.fit` stopped: `max_epochs=4` reached.


In [11]:
torch.save(model.state_dict(), 'Transformer_epoch14_Lavanya_Shashank.pth')