In [None]:
!pip install transformers
!pip install sentencepiece
!pip install rouge

Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m100.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m82.1 MB/s[0m eta [36m0:00:

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig, AdamW, get_linear_schedule_with_warmup
import pandas as pd
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from sklearn.model_selection import train_test_split
data = pd.read_excel(r'/content/drive/MyDrive/bart/MeQSum_ACL2019_BenAbacha_Demner-Fushman.xlsx')

train_data, temp = train_test_split(data, test_size=0.2, random_state=42)
test_data, val_data = train_test_split(temp, test_size=0.5, random_state=42)

train_data = train_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [None]:
class PegaCustomDataset(Dataset):

    def __init__(self, dataframe, source_col, target_col, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_col = source_col
        self.target_col = target_col
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        source = str(self.data[self.source_col][index])
        target = str(self.data[self.target_col][index])

        source = self.tokenizer.encode_plus(
            source,
            max_length=self.max_len,
            padding='max_length',
            return_tensors="pt",
             truncation=True
        )

        target = self.tokenizer.encode_plus(
            target,
            max_length=self.max_len,
            padding='max_length',
            return_tensors="pt",
             truncation=True
        )

        return {
            "input_ids": source["input_ids"].flatten(),
            "attention_mask": source["attention_mask"].flatten(),
            "labels": target["input_ids"].flatten(),
        }

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
train_dataset_pega = PegaCustomDataset(train_data, 'CHQ', 'Summary', tokenizer, 1000)
val_dataset_pega = PegaCustomDataset(val_data, 'CHQ', 'Summary', tokenizer, 1000)
test_dataset_pega = PegaCustomDataset(test_data, 'CHQ', 'Summary', tokenizer, 1000)

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-pubmed and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)neration_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

In [None]:
import numpy as np
best_loss = np.inf
best_batchsize = None
best_epoch = None

for batchsize in range(2,3):
    torch.cuda.empty_cache()
    train_dataloader = DataLoader(train_dataset_pega, batch_size=batchsize)
    val_dataloader = DataLoader(val_dataset_pega, batch_size=batchsize)

    optimizer = AdamW(model.parameters(), lr=1e-5)
    total_steps = len(train_dataloader) * 2
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    for epoch in range(1):
        print(f'Epoch: {epoch+1}')

        # Training Phase
        model.train()
        total_train_loss = 0
        for batch in train_dataloader:
            optimizer.zero_grad()
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            total_train_loss += loss.item()

        # Validation Phase
        model.eval()
        total_val_loss = 0
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            with torch.no_grad():
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_val_loss += loss.item()

        avg_val_loss = total_val_loss / len(val_dataloader)
        print(f'Validation Loss: {avg_val_loss}')

        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            best_batchsize = batchsize
            best_epoch = epoch + 1
            model_path = "/content/drive/MyDrive/Pegasus/best_model"
            model.save_pretrained(model_path)
            print(f'Saving model at epoch {best_epoch} with validation loss of {best_loss:.3f}')

In [None]:
model_path = "/content/drive/MyDrive/PEGASUS/best_model"
model = PegasusForConditionalGeneration.from_pretrained(model_path)
test_dataset_pega = PegaCustomDataset(test_data, 'CHQ', 'Summary', tokenizer, 1000)
test_dataloader = DataLoader(test_dataset_pega, batch_size=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)
model.eval()
predictions = []

for batch in test_dataloader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)

    preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in outputs]
    predictions.extend(preds)

reference_summaries = test_data['Summary'].tolist()
from rouge import Rouge

rouge = Rouge()
rouge_l_scores = rouge.get_scores(predictions, reference_summaries, avg=True)['rouge-l']

print("ROUGE-L Scores:", rouge_l_scores)

from nltk.tokenize import word_tokenize
from nltk.translate.meteor_score import meteor_score
import nltk
nltk.download('punkt')
nltk.download('wordnet')
# Tokenize the reference summaries and predictions
tokenized_reference_summaries = [word_tokenize(ref) for ref in reference_summaries]
tokenized_predictions = [word_tokenize(pred) for pred in predictions]

# Compute METEOR scores
meteor_scores = [meteor_score([ref], pred) for ref, pred in zip(tokenized_reference_summaries, tokenized_predictions)]
avg_meteor_score = sum(meteor_scores) / len(meteor_scores)

print("Average METEOR Score:", avg_meteor_score)

from nltk.translate.bleu_score import corpus_bleu

# Format the reference summaries for use with nltk's corpus_bleu
references = [[ref.split()] for ref in reference_summaries]
# Tokenize the generated predictions
candidates = [pred.split() for pred in predictions]

bleu_score = corpus_bleu(references, candidates)

print("BLEU Score:", bleu_score)



ROUGE-L Scores: {'r': 0.018132478632478636, 'p': 0.007101824777037, 'f': 0.009338270522074019}


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Average METEOR Score: 0.00719856666986438
BLEU Score: 2.248384924549639e-80


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
