In [1]:
from Trainer import PL_Trainer
from Utils import get_callbacks
from Dataset import SummarizationDataset
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from adamp import AdamP
import pandas as pd
import torch

In [2]:
model_name = 'google/pegasus-large'
tokenizer = PegasusTokenizer.from_pretrained(model_name) 
model = PegasusForConditionalGeneration.from_pretrained(model_name)

INPUT_LEN = 1024
TARGET_LEN = 100
LR = 1e-4

In [3]:
# input data format example
train_df = pd.DataFrame({
    "input": ['we are the champions. How are you? who is it? what about the incredible',
    'how are you? how is it? where is the guys? this is interesting', 'transformers bad good are you? '] * 150,
    "target": ['hey we are the champions', 'we are champions', 'this is cool '] * 150
})
val_df = pd.DataFrame({
   "input": ['we are the champions. How are you? who is it? what about the incredible',
    'how are you? how is it? where is the guys? this is interesting', 'transformers bad good are you? '],
    "target": ['hey we are the champions', 'we are champions', 'this is cool ']
})

In [4]:
train_dataset = SummarizationDataset(train_df, tokenizer, INPUT_LEN, TARGET_LEN)
val_dataset = SummarizationDataset(val_df, tokenizer, INPUT_LEN, TARGET_LEN)
train_dl = DataLoader(train_dataset,
                      shuffle=True,
                      batch_size=1,
                      num_workers=20,
                      collate_fn=train_dataset.collate_fn)
val_dl = DataLoader(val_dataset,
                    shuffle=False,
                    batch_size=1,
                    num_workers=20,
                    collate_fn=train_dataset.collate_fn)

In [5]:
optimizer = AdamP([p for p in model.parameters() if p.requires_grad],
                   lr=LR, betas=(0.9, 0.999), weight_decay=0.5e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=0.5,
            patience=1,
            min_lr=1e-12,
            verbose=True,
        )

In [6]:
for_pl_trainer = PL_Trainer(model, optimizer, scheduler)
callbacks, checkpoint_callback = get_callbacks()
trainer = pl.Trainer(
        gpus=[0, 1],
        accelerator='dp',
        callbacks=[callbacks, checkpoint_callback],
        gradient_clip_val=0.1,
        precision=16,  # 32 for reproducibility, 16 for research
        auto_lr_find=True,
        max_epochs=1,
        checkpoint_callback=True)
trainer.fit(for_pl_trainer, train_dl, val_dl)

  f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated"
Using 16bit native Automatic Mixed Precision (AMP)
  f"Setting `Trainer(checkpoint_callback={checkpoint_callback})` is deprecated in v1.5 and will "
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type                            | Params
----------------------------------------------------------
0 | model | PegasusForConditionalGeneration | 570 M 
----------------------------------------------------------
568 M     Trainable params
2.1 M     Non-trainable params
570 M     Total params
1,141.594 Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]