In [1]:
import warnings
warnings.filterwarnings("ignore")

from model import Summarizer
from dataset import *
import pandas as pd
from utils import load_data
from constants import *
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger
from pytorch_lightning import Trainer
from summarize import summarize

2023-08-14 22:25:57.739933: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Loading data and tokenizer
train_set, test_set = load_data()
tokenizer = T5TokenizerFast.from_pretrained(MODEL_NAME)

In [12]:
# Configuring checkpoints and criteria for saving models
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best",
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min'
)

# Configuring log folder
logger = CSVLogger(save_dir="log", name="summary")


In [11]:

# Configuring trainer
trainer = Trainer(
    logger=logger,
    callbacks=checkpoint_callback,
    accelerator='gpu',
    max_epochs=N_EPOCHS,
    val_check_interval=0.2
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
# Getting the summarizer model and dataloader
model = Summarizer()
datamodule = NewsDataModule(train=train_set, test=test_set, tokenizer=tokenizer, batch_size=BATCH_SIZE)

print("training started")
# Training
trainer.fit(model, datamodule)

training started


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M
-----------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
242.026   Total estimated model params size (MB)


Epoch 0:  20%|██        | 201/1005 [00:45<03:01,  4.42it/s, loss=1.3, v_num=0, train_loss=0.842, val_loss=0.943]

Epoch 0, global step 89: 'val_loss' reached 0.94268 (best 0.94268), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 0:  40%|████      | 402/1005 [01:35<02:22,  4.22it/s, loss=0.948, v_num=0, train_loss=0.602, val_loss=0.778]

Epoch 0, global step 178: 'val_loss' reached 0.77814 (best 0.77814), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 0:  60%|██████    | 603/1005 [02:25<01:37,  4.14it/s, loss=0.912, v_num=0, train_loss=0.828, val_loss=0.719]

Epoch 0, global step 267: 'val_loss' reached 0.71858 (best 0.71858), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 0:  80%|████████  | 804/1005 [03:17<00:49,  4.07it/s, loss=0.798, v_num=0, train_loss=0.693, val_loss=0.687]

Epoch 0, global step 356: 'val_loss' reached 0.68744 (best 0.68744), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 0: 100%|██████████| 1005/1005 [04:09<00:00,  4.02it/s, loss=0.851, v_num=0, train_loss=0.759, val_loss=0.668]

Epoch 0, global step 445: 'val_loss' reached 0.66788 (best 0.66788), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1:  20%|██        | 201/1005 [00:47<03:09,  4.24it/s, loss=0.698, v_num=0, train_loss=1.100, val_loss=0.650] 

Epoch 1, global step 534: 'val_loss' reached 0.65020 (best 0.65020), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1:  40%|████      | 402/1005 [01:39<02:28,  4.06it/s, loss=0.655, v_num=0, train_loss=0.448, val_loss=0.636]

Epoch 1, global step 623: 'val_loss' reached 0.63575 (best 0.63575), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1:  60%|██████    | 603/1005 [02:32<01:41,  3.96it/s, loss=0.755, v_num=0, train_loss=0.474, val_loss=0.625]

Epoch 1, global step 712: 'val_loss' reached 0.62507 (best 0.62507), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1:  80%|████████  | 804/1005 [03:25<00:51,  3.91it/s, loss=0.685, v_num=0, train_loss=0.374, val_loss=0.615]

Epoch 1, global step 801: 'val_loss' reached 0.61517 (best 0.61517), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1: 100%|██████████| 1005/1005 [04:18<00:00,  3.89it/s, loss=0.866, v_num=0, train_loss=0.777, val_loss=0.605]

Epoch 1, global step 890: 'val_loss' reached 0.60521 (best 0.60521), saving model to '/media/rolexx/Local Disk/fuse project/checkpoints/best-v1.ckpt' as top 1


Epoch 1: 100%|██████████| 1005/1005 [04:26<00:00,  3.77it/s, loss=0.866, v_num=0, train_loss=0.777, val_loss=0.605]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1005/1005 [04:26<00:00,  3.77it/s, loss=0.866, v_num=0, train_loss=0.777, val_loss=0.605]


In [18]:
# Loading the the best model from checkpoint
best = Summarizer().load_from_checkpoint("checkpoints/best.ckpt")

In [14]:
index = 100
text = test_set.iloc[index]['Articles']
label = test_set.iloc[index]['Summaries']
text

'Spurs to sign Iceland U21 star\n\nTottenham are primed to snap up Iceland Under-21 international Emil Hallfredsson after he impressed on trial at White Hart Lane\n\n.The 20-year-old midfielder, who plays for FH Hafnarfjordur, also starred in the Uefa Cup match against Scottish side Dunfermline earlier this season. Spurs have agreed a fee for the player, who has yet to agree personal terms. "He had offers from two other clubs but he decided to come to Tottenham," said Spurs sporting director Frank Arnesen. "He is a left-sided player, a position we have been looking at and he showed so much talent in his time here that we decided to take him. "It\'s down the road of bringing in talent, good prospects and giving them a place at Tottenham where they can improve."'

In [19]:
summary = summarize(text, best, tokenizer, length=250)

In [20]:
summary

'Spurs to sign Iceland U21 star Tottenham are primed to snap up Iceland Under-21 international Emil Hallfredsson after he impressed on trial at White Hart Lane.The 20-year-old midfielder, who plays for FH Hafnarfjordur, also starred in the Uefa Cup match against Scottish side Dunfermline earlier this season.'

In [16]:
summary

'Spurs to sign Iceland U21 star Tottenham are primed to snap up Iceland Under-21 international Emil Hallfredsson after he impressed on trial at White Hart Lane.The 20-year-old midfielder, who plays for FH Hafnarfjordur, also starred in the Uefa Cup match against Scottish side Dunfermline earlier this season.It\'s down the road of bringing in talent, good prospects and giving them a place at Tottenham where they can improve."He is a left-sided player, a position we have been looking at and he showed so much talent in his time here that we decided to take him."'

In [17]:
label

'"He had offers from two other clubs but he decided to come to Tottenham," said Spurs sporting director Frank Arnesen."He is a left-sided player, a position we have been looking at and he showed so much talent in his time here that we decided to take him."It\'s down the road of bringing in talent, good prospects and giving them a place at Tottenham where they can improve."'