In [None]:
!pip install -q transformers datasets rouge-score bert_score nltk

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "facebook/bart-base" #facebook/bart-base  t5-base  google/pegasus-xsum
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")
model.eval()

# Custom Input Example (Summarization task)
input_text = "summarize: The Eiffel Tower is located in Paris and was completed in 1889. It is a major tourist attraction."

# Tokenize & Generate
inputs = tokenizer(input_text, return_tensors="pt", truncation=True).input_ids.to("cuda")
with torch.no_grad():
    outputs = model.generate(inputs, max_length=50)

# Decode and Print Result
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Input:", input_text)
print("Output:", summary)

In [None]:
# Import Libraries
import torch
import math
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from rouge_score import rouge_scorer
import bert_score
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Local Summarization Dataset
dataset = [
    {
        "article": "The Eiffel Tower is located in Paris and was completed in 1889. It is a major tourist attraction.",
        "highlights": "The Eiffel Tower in Paris was completed in 1889."
    },
    {
        "article": "Python is a programming language known for its simplicity and readability. It is used widely in AI.",
        "highlights": "Python is a simple, readable language popular in AI."
    },
    {
        "article": "The Amazon rainforest is the largest tropical rainforest in the world and is home to diverse wildlife.",
        "highlights": "Amazon rainforest is the world's largest and rich in biodiversity."
    }
]

# Load Model
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")
model.eval()

# Generate Summaries & Collect Results
predictions = []
references = []
losses = []

for sample in dataset:
    input_text = "summarize: " + sample["article"]
    input_ids = tokenizer(input_text, return_tensors="pt", truncation=True).input_ids.to("cuda")

    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=100)
        pred = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        target_ids = tokenizer(sample["highlights"], return_tensors="pt", truncation=True).input_ids.to("cuda")
        loss = model(input_ids=input_ids, labels=target_ids).loss.item()

    predictions.append(pred)
    references.append(sample["highlights"])
    losses.append(loss)

# BLEU Score

def real_bleu(pred, ref):
    pred_tokens = pred.lower().split()
    ref_tokens = [ref.lower().split()]  # note: must be a list of references
    smoothie = SmoothingFunction().method4  # avoids BLEU=0 for short outputs
    return sentence_bleu(ref_tokens, pred_tokens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothie)

bleu_scores = [real_bleu(pred, ref) for pred, ref in zip(predictions, references)]
avg_bleu = sum(bleu_scores) / len(bleu_scores)

print(f"BLEU Score: {avg_bleu:.4f}")

# ROUGE-1, ROUGE-2, ROUGE-L
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge1, rouge2, rougeL, precision, recall = [], [], [], [], []

for pred, ref in zip(predictions, references):
    scores = scorer.score(ref, pred)
    rouge1.append(scores["rouge1"].fmeasure)
    rouge2.append(scores["rouge2"].fmeasure)
    rougeL.append(scores["rougeL"].fmeasure)


print(f"ROUGE-1 F1 Score: {sum(rouge1)/len(rouge1):.4f}")
print(f"ROUGE-2 F1 Score: {sum(rouge2)/len(rouge2):.4f}")
print(f"ROUGE-L F1 Score: {sum(rougeL)/len(rougeL):.4f}")


# Perplexity
avg_loss = sum(losses) / len(losses)
perplexity = math.exp(avg_loss)
print(f"Perplexity: {perplexity:.2f}")

# BERTScore
P, R, F1 = bert_score.score(predictions, references, lang="en", verbose=False)
print(f"BERTScore - Precision: {P.mean().item():.4f}")
print(f"BERTScore - Recall: {R.mean().item():.4f}")
print(f"BERTScore - F1: {F1.mean().item():.4f}")