In [1]:
!pip install transformers
!pip install datasets
!pip install rouge-score
!pip install nltk
!pip install tqdm
!pip install sentencepiece

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (770 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m770.4/770.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/

In [1]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Bart_multiNews_model"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "multi_news"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["document"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Found cached dataset multi_news (/root/.cache/huggingface/datasets/multi_news/default/1.0.0/2f1f69a2bedc8ad1c5d8ae5148e4755ee7095f465c1c01ae8f85454342065a72)
Processing batches: 100%|██████████| 224/224 [47:36<00:00, 12.75s/it] 


ROUGE-1: 0.3776
ROUGE-2: 0.1375
ROUGE-L: 0.2042
BLEU: 0.0446


In [2]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Pegasus_multiNews_model"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "multi_news"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["document"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.98k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.12k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/3.83k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.82k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading and preparing dataset multi_news/default to /root/.cache/huggingface/datasets/multi_news/default/1.0.0/2f1f69a2bedc8ad1c5d8ae5148e4755ee7095f465c1c01ae8f85454342065a72...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/58.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/66.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.30M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/69.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.31M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/44972 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5622 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5622 [00:00<?, ? examples/s]

Dataset multi_news downloaded and prepared to /root/.cache/huggingface/datasets/multi_news/default/1.0.0/2f1f69a2bedc8ad1c5d8ae5148e4755ee7095f465c1c01ae8f85454342065a72. Subsequent calls will reuse this data.


Processing batches: 100%|██████████| 224/224 [46:54<00:00, 12.56s/it]


ROUGE-1: 0.3375
ROUGE-2: 0.1272
ROUGE-L: 0.1946
BLEU: 0.0355


In [2]:
import torch
from transformers import ProphetNetForConditionalGeneration, ProphetNetTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Prophetnet_multiNews_model"
tokenizer = ProphetNetTokenizer.from_pretrained(model_name)
model = ProphetNetForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "multi_news"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["document"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 50
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading (…)prophetnet.tokenizer:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.57G [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/3.83k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.82k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading and preparing dataset multi_news/default to /root/.cache/huggingface/datasets/multi_news/default/1.0.0/2f1f69a2bedc8ad1c5d8ae5148e4755ee7095f465c1c01ae8f85454342065a72...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/58.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/66.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.30M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/69.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.31M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/44972 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5622 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5622 [00:00<?, ? examples/s]

Dataset multi_news downloaded and prepared to /root/.cache/huggingface/datasets/multi_news/default/1.0.0/2f1f69a2bedc8ad1c5d8ae5148e4755ee7095f465c1c01ae8f85454342065a72. Subsequent calls will reuse this data.


Processing batches: 100%|██████████| 112/112 [2:10:23<00:00, 69.86s/it] 


ROUGE-1: 0.4355
ROUGE-2: 0.1641
ROUGE-L: 0.2265
BLEU: 0.0644
