# Evaluating Pretrained Bart

In [1]:
from transformers import BartForConditionalGeneration, BartTokenizer
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
import evaluate
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-xsum").to(device)

print(device)

cuda


In [3]:
total_params = sum(p.numel() for p in model.parameters())

print(f"Total number of parameters: {total_params}")

Total number of parameters: 406290432


In [6]:
class sumTokenizer:
    def __init__(self, max_text_length=512, max_summary_length=128):
        self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-xsum')
        self.max_text_length = max_text_length
        self.max_summary_length = max_summary_length
        self.vocab_size = len(self.tokenizer)
        
    def __call__(self, text, is_target=False, padding=True, truncation=True):
        if is_target:
            return self.tokenizer(
                text,
                padding='max_length' if padding else False,
                truncation=truncation,
                max_length=self.max_summary_length,
                return_tensors='pt'
            )
        else:
            return self.tokenizer(
                text,
                padding='max_length' if padding else False,
                truncation=truncation,
                max_length=self.max_text_length,
                return_tensors='pt'
            )
            
    
    def decode(self, token_ids, skip_special_tokens=True):
        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)

class xsumDataset(Dataset):
    def __init__(self, split="train", max_text_length=512, max_summary_length=128):
        self.dataset = load_dataset("EdinburghNLP/xsum", trust_remote_code=True)[split]
        self.tokenizer = sumTokenizer(max_text_length, max_summary_length)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, i):
        # Get text and summary from dataset
        text = self.dataset[i]["document"]
        summary = self.dataset[i]["summary"]

        # Tokenize input and target
        inputs = self.tokenizer(text ,is_target=False)
        targets = self.tokenizer(summary, is_target=True)

        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "input_mask": inputs["attention_mask"].squeeze(0),
            "target_ids": targets["input_ids"].squeeze(0),
            "target_mask": targets["attention_mask"].squeeze(0)
        }

In [7]:
max_text_length=512
max_summary_length=128
batch_size = 1


train_set = xsumDataset(split="train", max_text_length=max_text_length, max_summary_length=max_summary_length)
val_set = xsumDataset(split="validation", max_text_length=max_text_length, max_summary_length=max_summary_length)
test_set = xsumDataset(split="test", max_text_length=max_text_length, max_summary_length=max_summary_length)

train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_set, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_set, shuffle=False, batch_size=batch_size)

In [8]:
tokenizer = sumTokenizer(max_text_length=max_text_length, max_summary_length=max_summary_length)


test_batch = next(iter(test_loader))
input_ids = test_batch["input_ids"].to(device) 
src_mask = test_batch["input_mask"].to(device)
target_ids = test_batch["target_ids"].to(device)
tgt_mask = test_batch["target_mask"].to(device)

print("Test input:", tokenizer.tokenizer.decode(input_ids[0], skip_special_tokens=True), "\n")
print("Test target:", tokenizer.tokenizer.decode(target_ids[0], skip_special_tokens=True))

model.eval()
with torch.no_grad():
    outputs = model.generate(input_ids)

predicted_text = tokenizer.tokenizer.batch_decode(outputs, skip_special_tokens=True)
print("Predicted Output:", predicted_text)

Test input: Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.
Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.
The Welsh Government said more people than ever were getting help to address housing problems.
Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.
Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.
However, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority.
Andrew Stevens, who works in Welsh prisons trying to secure housing for prison leavers, said the need fo

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Predicted Output: ['There is a "desperate need" for housing for ex-prisoners in Wales, a charity has said.']


In [9]:
from evaluate import load

# Load the ROUGE metric
rouge = load("rouge")

test_loss = 0
all_references = []
all_hypotheses = []

with torch.no_grad():  # Disable gradient calculation
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        src_mask = batch["input_mask"].to(device)
        target_ids = batch["target_ids"].to(device)
        tgt_mask = batch["target_mask"].to(device)

        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=src_mask,
            labels=target_ids,
            decoder_attention_mask=tgt_mask
        )

        # Calculate cross entropy loss directly
        loss = outputs.loss
        test_loss += loss.item()

        # Convert outputs and targets to strings for ROUGE score
        predictions = tokenizer.tokenizer.batch_decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
        references = tokenizer.tokenizer.batch_decode(target_ids, skip_special_tokens=True)

        # Calculate ROUGE scores
        results = rouge.compute(predictions=predictions, references=references, tokenizer=lambda x: x.split())
        all_references.extend(references)
        all_hypotheses.extend(predictions)

# Calculate average test loss
avg_test_loss = test_loss / len(test_loader)
print(f"Test Loss: {avg_test_loss:.4f}")

# Print ROUGE scores
print(results)

Test Loss: 9.7185
{'rouge1': 0.4888888888888889, 'rouge2': 0.23255813953488372, 'rougeL': 0.4888888888888889, 'rougeLsum': 0.4888888888888889}
