In [None]:
!pip install transformers
!pip install datasets
!pip install rouge-score
!pip install nltk
!pip install tqdm

In [1]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Bart_GovReport_model"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "ccdv/govreport-summarization"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["report"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

No config specified, defaulting to: govreport-summarization/document
Found cached dataset govreport-summarization (/root/.cache/huggingface/datasets/ccdv___govreport-summarization/document/1.0.0/57ca3042de9c40c218cc94084cbc80a99a161036134bfc88112c57d251443590)
Processing batches: 100%|██████████| 38/38 [08:50<00:00, 13.95s/it]


ROUGE-1: 0.2245
ROUGE-2: 0.0957
ROUGE-L: 0.1401
BLEU: 0.0021


In [2]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Pegasus_GovReport_model"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "ccdv/govreport-summarization"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["report"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.98k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.12k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Processing batches: 100%|██████████| 38/38 [12:19<00:00, 19.47s/it]


ROUGE-1: 0.2529
ROUGE-2: 0.1000
ROUGE-L: 0.1556
BLEU: 0.0068


In [3]:
import torch
from transformers import ProphetNetForConditionalGeneration, ProphetNetTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Prophetnet_GovReport_model"
tokenizer = ProphetNetTokenizer.from_pretrained(model_name)
model = ProphetNetForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "ccdv/govreport-summarization"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["report"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading (…)prophetnet.tokenizer:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.57G [00:00<?, ?B/s]

Processing batches: 100%|██████████| 38/38 [42:58<00:00, 67.85s/it]


ROUGE-1: 0.3255
ROUGE-2: 0.1303
ROUGE-L: 0.1767
BLEU: 0.0152
