In [1]:
!pip install pytorch_lightning
!pip install lightning
!pip install torch
!pip install torchvision
!pip install datasets
!pip install tokenizers

Collecting pytorch_lightning
  Using cached pytorch_lightning-2.0.8-py3-none-any.whl (727 kB)
Collecting fsspec[http]>2021.06.0
  Using cached fsspec-2023.9.0-py3-none-any.whl (173 kB)
Collecting lightning-utilities>=0.7.0
  Using cached lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Collecting PyYAML>=5.4
  Downloading PyYAML-6.0.1-cp310-cp310-win_amd64.whl (145 kB)
     ---------------------------------------- 0.0/145.3 kB ? eta -:--:--
     -------------------------------------- 145.3/145.3 kB 4.4 MB/s eta 0:00:00
Collecting torchmetrics>=0.7.0
  Using cached torchmetrics-1.1.1-py3-none-any.whl (763 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1
  Using cached aiohttp-3.8.5-cp310-cp310-win_amd64.whl (323 kB)
Collecting frozenlist>=1.1.1
  Using cached frozenlist-1.4.0-cp310-cp310-win_amd64.whl (44 kB)
Collecting yarl<2.0,>=1.0
  Using cached yarl-1.9.2-cp310-cp310-win_amd64.whl (61 kB)
Collecting async-timeout<5.0,>=4.0.0a3
  Using cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)

### Read the Config file from config.py

In [2]:
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import OneCycleLR
from config import get_config
from lightning_dataset import BilingualLightning
from lightning_model import TransformerLightning
from lightning_train import train_transformer

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
torch.cuda.is_available()

True

### Edit the config file to accomodate the batch_size on available GPU machine

In [3]:
from config import get_config


In [4]:
cfg = get_config()

In [5]:
cfg

{'parameter_sharing': False,
 'd_ff': 2048,
 'batch_size': 32,
 'num_epochs': 30,
 'lr': 0.0001,
 'seq_len': 160,
 'd_model': 512,
 'lang_src': 'en',
 'lang_tgt': 'fr',
 'model_folder': 'weights',
 'model_basename': 'tmodel_',
 'preload': True,
 'tokenizer_file': 'tokenizer_{0}.json',
 'rundir': 'runs',
 'experiment_name': 'tmodel_dynamic_padding',
 'ds_mode': 'Online',
 'ds_path': None,
 'ds_name': 'opus_books',
 'save_ds_to_disk': True}

In [6]:
cfg['parameter_sharing'] = True
cfg['d_ff'] = 1024
cfg['batch_size'] = 16
cfg['d_model'] = 512
cfg['preload'] = None
cfg['num_epochs'] = 40
cfg['ds_mode'] = 'Online'
cfg['lang_tgt'] = 'fr'
cfg['save_ds_to_disk'] = False
cfg['experiment_name'] = f"tmodel_dynamic_pad_150_tokens_batch_{cfg['batch_size']}_amp_dff{cfg['d_ff']}"

In [7]:
# updated config
cfg

{'parameter_sharing': True,
 'd_ff': 1024,
 'batch_size': 16,
 'num_epochs': 40,
 'lr': 0.0001,
 'seq_len': 160,
 'd_model': 512,
 'lang_src': 'en',
 'lang_tgt': 'fr',
 'model_folder': 'weights',
 'model_basename': 'tmodel_',
 'preload': None,
 'tokenizer_file': 'tokenizer_{0}.json',
 'rundir': 'runs',
 'experiment_name': 'tmodel_dynamic_pad_150_tokens_batch_16_amp_dff1024',
 'ds_mode': 'Online',
 'ds_path': None,
 'ds_name': 'opus_books',
 'save_ds_to_disk': False}

### Enable the Mixed Precision Training

In [8]:
torch.cuda.amp.autocast(enabled=True)

<torch.cuda.amp.autocast_mode.autocast at 0x259011831f0>

In [9]:
torch.set_float32_matmul_precision('medium')

### Train the model

In [10]:
opus_fr = BilingualLightning(cfg)
opus_fr.prepare_data()
opus_fr.setup()
train_data_loader = opus_fr.train_dataloader()

In [11]:


tokenizer_src = opus_fr.tokenizer_src
tokenizer_tgt = opus_fr.tokenizer_tgt
loss_criterion = nn.CrossEntropyLoss(ignore_index=tokenizer_src.token_to_id('[PAD]'), label_smoothing=0.1)
transformer_model = TransformerLightning(cfg, loss_criterion, tokenizer_src, tokenizer_tgt, num_validation_examples=2, epochs = cfg['num_epochs'])
optimizer = torch.optim.Adam(transformer_model.model.parameters(), lr=cfg['lr'], eps=1e-9)
transformer_model.set_optimizer(optimizer)

scheduler = OneCycleLR(
    optimizer,
    max_lr=1E-3,
    steps_per_epoch=len(train_data_loader),
    epochs=cfg['num_epochs'],
    pct_start=5/cfg['num_epochs'],
    div_factor=10,
    three_phase=True,
    final_div_factor=10,
    anneal_strategy='linear'
)
transformer_model.set_scheduler_dict(scheduler, freq = 'step')

Total Parameters: 61847890


In [15]:

import torch
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.callbacks import Callback
import torchmetrics
from config import get_weights_file_path
#from config import get_weights_file_path
import pytorch_lightning as pl


class PeriodicCheckpoint(ModelCheckpoint):
    def __init__(self, config, verbose: bool = False):
        super().__init__()
        self.config = config
        self.verbose = verbose

    def on_train_epoch_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule, *args, **kwargs):
        # save the model at the end of every epoch
        model_filename = get_weights_file_path(self.config, f"{trainer.current_epoch}")
        trainer.save_checkpoint(model_filename)


class PrintAccuracyAndLoss(Callback):
    def __init__(self):
        super().__init__()

    def on_train_epoch_end(self, trainer, pl_module):
        train_loss = trainer.callback_metrics['train_loss']
        trainer.model.log("train_epoch_loss", train_loss)
        print(f"Epoch {trainer.current_epoch}: train_loss={train_loss:.4f}")

    def on_validation_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:

        assert len(trainer.model.predicted_list) > 0, "Validation: predicted list is empty"
        assert len(trainer.model.expected_list) > 0, "Validation: expected list is empty"

        # compute the character error rate
        metric = torchmetrics.CharErrorRate()
        cer = metric(trainer.model.predicted_list, trainer.model.expected_list)

        # compute word error rate
        metric = torchmetrics.WordErrorRate()
        wer = metric(trainer.model.predicted_list, trainer.model.expected_list)

        # compute the BLEU metric
        metric = torchmetrics.BLEUScore(n_gram=2)
        bleu = metric(trainer.model.predicted_list, trainer.model.expected_list)

        trainer.model.log("validation_epoch_wer", wer)
        trainer.model.log("validation_epoch_cer", cer)
        trainer.model.log("validation_epoch_bleu", bleu)
        trainer.model.predicted_list = []
        trainer.model.expected_list = []
        assert len(trainer.model.predicted_list) == 0, "Validation: predicted list is not reset"
        assert len(trainer.model.expected_list) == 0, "Validation: expected list is not reset"
        return

def train_transformer(model, datamodule, config, ckpt_path=None, epochs=2):
    trainer = Trainer(
        enable_checkpointing=True,
        max_epochs=epochs,
        accelerator="gpu",
        #accelerator=None,
        devices=1 if torch.cuda.is_available() else None,
        #logger=CSVLogger(save_dir="logs/"),
        # logger=TensorBoardLogger(save_dir=config["rundir"], name=config["experiment_name"], default_hp_metric=False),
        callbacks=[LearningRateMonitor(logging_interval="step"),
                   TQDMProgressBar(refresh_rate=10),
                   #RichProgressBar(refresh_rate=10, leave=True),
                   PeriodicCheckpoint(config, verbose=True),
                   PrintAccuracyAndLoss()],
        num_sanity_val_steps=0,
        precision=16
    )
    
    trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader(), ckpt_path=ckpt_path)
    trainer.test(model, datamodule.test_dataloader())
    return trainer

In [16]:
use_cuda = torch.cuda.is_available()
print (use_cuda)

True


In [17]:
torch.zeros(1).cuda()

tensor([0.], device='cuda:0')

In [18]:
train_transformer(transformer_model, opus_fr, cfg, epochs=cfg['num_epochs'])

  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: e:\Deep Learning\Assignments\ERAV1\session16\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type             | Params
----------------------------------------------------
0 | loss_criterion | CrossEntropyLoss | 0     
1 | model          | Transformer      | 61.8 M
----------------------------------------------------
61.8 M    Trainable params
0         Non-trainable params
61.8 M    Total params
247.392   Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 4975/4975 [16:31<00:00,  5.02it/s, v_num=0, train_loss=5.040]*****************************************
    SOURCE: 'Ah, how much wiser I should be,' he said to himself, 'to remove the marks from my linen, and retire to some lonely forest, twenty leagues from Paris, there to end this accursed existence!
    TARGET: Ah ! que je serais plus sage, se disait-il, de démarquer mon linge, et d’aller dans quelque forêt solitaire, à vingt lieues de Paris, finir cette exécrable vie !
 PREDICTED: -- Ah ! si je crois , je ne puis pas se faire à la place de la route , et , à la côte , il y a de Paris , ce que ces messieurs !
*****************************************

*****************************************
    SOURCE: "Did master summon me?" he said, entering.
    TARGET: « Monsieur m'appelle ? dit-il en entrant.
 PREDICTED: -- - moi , dit - il , il me dit - il .
*****************************************

Epoch 0: 100%|██████████| 4975/4975 [17:10<00:00,  4.83it/s, v_num=




Epoch 1: 100%|██████████| 4975/4975 [18:39<00:00,  4.44it/s, v_num=0, train_loss=4.480]*****************************************
    SOURCE: He will soon find some way of making fire for us!"
    TARGET: Il trouvera bien le moyen de nous faire du feu, lui!
 PREDICTED: Il va bientôt nous faire un tour de feu pour nous !
*****************************************

*****************************************
    SOURCE: They did not think either of the danger which threatened them should the convicts return, or of the precautions to be taken for the future.
    TARGET: Ils ne s'étaient préoccupés ni du danger qui pouvait les menacer si les convicts revenaient, ni des précautions à prendre pour l'avenir.
 PREDICTED: Ils ne savaient pas de ce danger , dont ils ne pouvaient les croire , ou les précautions de l ' avenir se mirent à l ' avenir .
*****************************************

Epoch 1: 100%|██████████| 4975/4975 [19:15<00:00,  4.31it/s, v_num=0, train_loss=4.480]Epoch 1: train_loss=4.

`Trainer.fit` stopped: `max_epochs=40` reached.


Epoch 39: 100%|██████████| 4975/4975 [14:35<00:00,  5.68it/s, v_num=0, train_loss=1.680]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(
  rank_zero_warn(


Testing DataLoader 0:   0%|          | 0/8844 [00:00<?, ?it/s]*****************************************
    SOURCE: More than once, he was on the point of being arrested by the sentries for making his way into places from which the engineers of the garrison excluded the public, in order to make a profit of twelve or fifteen francs every year by the sale of the hay grown there.
    TARGET: Deux ou trois fois il fut sur le point de se faire arrêter par les sentinelles ; il pénétrait dans des endroits que le génie militaire interdit au public, afin de vendre pour douze ou quinze francs de foin tous les ans.
 PREDICTED: Plusieurs fois , il était sur le point d ’ être arrêté par les sentinelles pour faire place dans lequel les ingénieurs ont pour se garnison , afin de faire des à douze ou quinze francs par la vente de ce magasin , là - bas , y .
*****************************************

*****************************************
    SOURCE: J’éprouvai, d’abord sans m’en rendre compte, tous 

<pytorch_lightning.trainer.trainer.Trainer at 0x25901136410>

In [None]:
# !pip install tensorboard

In [19]:
def encode_input_sentence(input, tokenizer_src, max_seq_len) -> torch.tensor:
    encoder_input_tokens = tokenizer_src.encode(input).ids
    sos_token = torch.tensor([tokenizer_src.token_to_id("[SOS]")], dtype=torch.int64)
    eos_token = torch.tensor([tokenizer_src.token_to_id("[EOS]")], dtype=torch.int64)
    pad_token = torch.tensor([tokenizer_src.token_to_id("[PAD]")], dtype=torch.int64)
    # Add sos, eos and padding to each sentence
    enc_num_padding_tokens = max_seq_len - len(encoder_input_tokens) - 2 # we will add <s> and </s>

    encoder_input = torch.cat(
        [
            sos_token,
            torch.tensor(encoder_input_tokens, dtype=torch.int64),
            eos_token,
            torch.tensor([pad_token]*enc_num_padding_tokens, dtype=torch.int64)
        ],
        dim = 0)
    encoder_mask = (encoder_input != pad_token).unsqueeze(0).unsqueeze(0).int()
    return encoder_input, encoder_mask

def translate_en_to_fr(input, opus_dm, cfg):
    encoder_input, encoder_mask = encode_input_sentence(input, opus_dm.tokenizer_src, cfg['seq_len'])
    output = transformer_model.greedy_decode(encoder_input, encoder_mask)
    model_out_text = opus_dm.tokenizer_tgt.decode(output.detach().cpu().numpy())
    return model_out_text

In [30]:
input = "Has training completed? In a few minutes the water reached 100 degrees centigrade."
output = translate_en_to_fr(input, opus_fr, cfg)

print(output)

- elle bien , depuis quelques minutes , l ' eau avait atteint jusqu ' à cent degrés au - dessous .
