**Step 1:**

If code is run in Colab, select a Runtime with GPU  
To check which GPU, run: *!nvidia-smi*

In [None]:
!nvidia-smi

Sun Oct 12 15:27:17 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   51C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

**Step 2:**

Install required libraries

In [None]:
# Uncomment if notebook is run in Google Colab
# Install required libraries
%%capture
!pip install transformers datasets evaluate
!pip install rouge-score
!pip install nltk

**Step 3:**

Necessary imports

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
from datasets import load_dataset
import evaluate
import torch
import nltk
from nltk import sent_tokenize

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

**Step 4:**

Set device variable

In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps") # In case code is run on Apple silicon
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [None]:
print(device)

cuda


**Step 5:**

Load model and tokenizer

In [None]:
model_checkpoint = 'facebook/bart-large-cnn'

tokenizer = BartTokenizer.from_pretrained(model_checkpoint)

model = BartForConditionalGeneration.from_pretrained(model_checkpoint)
model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        

In [None]:
model.num_parameters()

406290432

**Step 6:**

Load dataset

In [None]:
data_test = load_dataset('cnn_dailymail', '3.0.0', split='test')

README.md: 0.00B [00:00, ?B/s]

3.0.0/train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

3.0.0/train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

3.0.0/train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

3.0.0/validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

3.0.0/test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

**Step 7:**

Set batch size, maxmimum batch size that can be supported by GPU/CPU depends on model size (***number of parameters***) and supported sequence length (***max_length*** parameter)

In [None]:
batch_size = 8

**Step 8:**

Set up function for summaries' generation using selected model

In [None]:
# Map data correclty
def generate_summary(batch):
    inputs = tokenizer(batch['article'],
                       padding=True,
                       truncation=True,
                       max_length=1024,
                       add_special_tokens=False,
                       return_tensors='pt')
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    # Decoding parameters set according to BART's config.json file
    outputs = model.generate(input_ids,
                             attention_mask=attention_mask,
                             min_length=56,
                             max_length=142,
                             num_beams=4,
                             no_repeat_ngram_size=3,
                             length_penalty=2.0,
                             early_stopping=True
                             )

    # All special tokens will be removed
    output_str = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    batch['pred'] = output_str

    return batch

**Step 9:**

Generate summaries for the selected dataset

In [None]:
results = data_test.map(generate_summary,
                        batched=True,
                        batch_size=batch_size,
                        remove_columns=['article'])

labels = results['highlights']
predictions = results['pred']

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [None]:
# ROUGE expects a newline after each sentence
clean_preds = ["\n".join(sent_tokenize(pred.replace('[X_SEP]', ' '))) for pred in predictions]
clean_labels = [label.replace(" .", ".") for label in labels]

for i in range(len(predictions)):
    print(f"Item {i}:")
    print("Label:")
    print(clean_labels[i])
    print("\n")
    print("Prediction:")
    print(clean_preds[i])
    print("\n")

**Step 10:**

Load ROUGE metric and evaluate generated results


In [None]:
rouge = evaluate.load('rouge', seed=42)

In [None]:
rouge_output = rouge.compute(predictions=clean_preds, references=clean_labels, use_stemmer=True)
rouge_metrics = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
rouge_scores = {metric: round(rouge_output[metric].mid.fmeasure * 100, 2)
                for metric in rouge_metrics}

print(rouge_dict)