In [1]:
!pip install -q --upgrade "transformers[torch]" datasets evaluate sentencepiece rouge-score tqdm


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
import evaluate
import torch
from tqdm import tqdm


In [3]:
model_name = "sshleifer/distilbart-cnn-12-6"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


Using device: cpu


In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
model.eval()


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        

In [5]:
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test[:1%]")  # ~300 samples
print("Loaded examples:", len(dataset))


Loaded examples: 115


In [6]:
def generate_summary(example):
    inputs = tokenizer(
        example["article"],
        max_length=1024,
        truncation=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        summary_ids = model.generate(
            inputs["input_ids"],
            num_beams=4,
            max_length=142,
            min_length=56,
            length_penalty=2.0,
            early_stopping=True
        )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return {"generated_summary": summary}

dataset_with_preds = dataset.map(generate_summary)


Map:   0%|          | 0/115 [00:00<?, ? examples/s]

In [7]:
rouge = evaluate.load("rouge")

results = rouge.compute(
    predictions=dataset_with_preds["generated_summary"],
    references=dataset_with_preds["highlights"],
    use_stemmer=True
)

for key, value in results.items():
    print(f"{key}: {value:.4f}")


Downloading builder script: 0.00B [00:00, ?B/s]

rouge1: 0.3410
rouge2: 0.1446
rougeL: 0.2502
rougeLsum: 0.2857
