In [None]:
!pip install transformers datasets evaluate torch rouge_score


In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

model_name = "google/pegasus-x-base"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)


In [None]:
from transformers.activations import ACT2FN
import torch.nn.functional as F

# Example: replace GELU with ReLU
ACT2FN["gelu"] = F.relu


In [None]:
text = "At the Labs, Dr. Harrison Wells holds a speech, when suddenly a thug steals Iris' laptop. Barry runs after him and catches him. But the thug won't surrender and hits Barry. He tries to escape, but he is caught by detective Eddie Thawne. At the station, Iris asks about him and Barry says he is a transfer from Keystone City. As the night goes on, a thunderstorm brews. Joe and his partner, Chyre, reach the last place on Barry's list and find the car, but get into a firefight with Clyde Mardon. Chyre is hit as Mardon boards a plane to escape. At the same time, something is happening with the particle accelerator. It explodes, causing Mardon's plane to explode. A surge of power from the accelerator escapes into the sky and affects the storm clouds. Barry is in his lab, when suddenly, all the liquid in there starts to float. Then, suddenly, he is struck by lightning from the affected storm clouds, and he hits upon all the chemicals in his lab and falls unconscious. He is immediately taken to an emergency care, still alive. However, it seems he is flatlining. Iris comes and is terrified for him."
inputs = tokenizer(text, return_tensors="pt")
summary_ids = model.generate(**inputs, max_length=20)
print(tokenizer.decode(summary_ids[0], skip_special_tokens=True))


In [None]:
from datasets import load_dataset
import evaluate

rouge = evaluate.load("rouge")
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test[:1%]")

def evaluate_model(model, tokenizer, dataset):
    preds, refs = [], []
    for sample in dataset:
        inputs = tokenizer(sample["article"], return_tensors="pt", truncation=True, padding="longest")
        summary_ids = model.generate(**inputs, max_length=128, num_beams=4)
        pred = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        preds.append(pred)
        refs.append(sample["highlights"])
    return rouge.compute(predictions=preds, references=refs)

results = evaluate_model(model, tokenizer, dataset)
print(results)
