In [1]:
!pip install transformers
!pip install datasets
!pip install rouge-score
!pip install nltk
!pip install tqdm

[0m

In [2]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Bart_MedPaper_model"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "pszemraj/scientific_lay_summarisation-plos-norm"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["article"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

Downloading and preparing dataset parquet/pszemraj--scientific_lay_summarisation-plos-norm to /root/.cache/huggingface/datasets/pszemraj___parquet/pszemraj--scientific_lay_summarisation-plos-norm-3d46fb74e7dd8e77/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/505M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/pszemraj___parquet/pszemraj--scientific_lay_summarisation-plos-norm-3d46fb74e7dd8e77/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.


Processing batches: 100%|██████████| 55/55 [12:28<00:00, 13.60s/it]


ROUGE-1: 0.3747
ROUGE-2: 0.1202
ROUGE-L: 0.2095
BLEU: 0.0429


In [4]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Pegasus_MedPaper_model"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "pszemraj/scientific_lay_summarisation-plos-norm"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["article"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Processing batches: 100%|██████████| 55/55 [11:03<00:00, 12.07s/it]


ROUGE-1: 0.3950
ROUGE-2: 0.1349
ROUGE-L: 0.2240
BLEU: 0.0509


In [1]:
import torch
from transformers import ProphetNetForConditionalGeneration, ProphetNetTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and tokenizer
model_name = "usakha/Prophetnet_MedPaper_model"
tokenizer = ProphetNetTokenizer.from_pretrained(model_name)
model = ProphetNetForConditionalGeneration.from_pretrained(model_name).to(device)

# Load the dataset from the Hugging Face Hub
dataset_name = "pszemraj/scientific_lay_summarisation-plos-norm"
dataset = load_dataset(dataset_name, split="test")

source_texts = [example["article"] for example in dataset]
target_texts = [example["summary"] for example in dataset]

# Summarize and calculate scores using batch processing
rouge_scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
rouge_scores = []
bleu_references = []
bleu_hypotheses = []

batch_size = 25
num_batches = len(source_texts) // batch_size

for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_sources = source_texts[start:end]
    batch_targets = target_texts[start:end]

    inputs = tokenizer(batch_sources, return_tensors="pt", padding=True, truncation=True).to(device)
    summary_ids = model.generate(inputs["input_ids"], num_beams=8, max_length=250, early_stopping=True)
    summaries = [tokenizer.decode(summary_id, skip_special_tokens=True) for summary_id in summary_ids]

    for target, summary in zip(batch_targets, summaries):
        rouge_scores.append(rouge_scorer.score(target, summary))
        bleu_references.append([target.split()])
        bleu_hypotheses.append(summary.split())

rouge1_avg = sum(score["rouge1"].fmeasure for score in rouge_scores) / len(rouge_scores)
rouge2_avg = sum(score["rouge2"].fmeasure for score in rouge_scores) / len(rouge_scores)
rougeL_avg = sum(score["rougeL"].fmeasure for score in rouge_scores) / len(rouge_scores)
bleu_score = corpus_bleu(bleu_references, bleu_hypotheses)

print(f"ROUGE-1: {rouge1_avg:.4f}")
print(f"ROUGE-2: {rouge2_avg:.4f}")
print(f"ROUGE-L: {rougeL_avg:.4f}")
print(f"BLEU: {bleu_score:.4f}")

Downloading (…)prophetnet.tokenizer:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.57G [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

Downloading and preparing dataset parquet/pszemraj--scientific_lay_summarisation-plos-norm to /root/.cache/huggingface/datasets/pszemraj___parquet/pszemraj--scientific_lay_summarisation-plos-norm-3d46fb74e7dd8e77/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/505M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/pszemraj___parquet/pszemraj--scientific_lay_summarisation-plos-norm-3d46fb74e7dd8e77/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.


Processing batches: 100%|██████████| 55/55 [1:02:52<00:00, 68.59s/it]


ROUGE-1: 0.4621
ROUGE-2: 0.1569
ROUGE-L: 0.2471
BLEU: 0.0719
