In [None]:
import pandas as pd
from tqdm import tqdm
import torch
from datasets import load_dataset, load_metric
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
dataset_name = 'dmacres/mimiciii-hospitalcourse-cossim-pagerank-batched-extractive-summ-v2'
mimiciii_dataset = load_dataset(dataset_name)
mimiciii_dataset

Downloading readme:   0%|          | 0.00/886 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/22.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/107M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating validation split:   0%|          | 0/5356 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5356 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/24993 [00:00<?, ? examples/s]

DatasetDict({
    validation: Dataset({
        features: ['subject_id', 'hadm_id', 'target_text', 'extractive_notes_summ', 'n_notes'],
        num_rows: 5356
    })
    test: Dataset({
        features: ['subject_id', 'hadm_id', 'target_text', 'extractive_notes_summ', 'n_notes'],
        num_rows: 5356
    })
    train: Dataset({
        features: ['subject_id', 'hadm_id', 'target_text', 'extractive_notes_summ', 'n_notes'],
        num_rows: 24993
    })
})

In [None]:
rouge_metric = load_metric("rouge")

  rouge_metric = load_metric("rouge")


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

In [None]:
model_ckpt = "google/pegasus-large"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSeq2SeqLM.from_pretrained(model_ckpt).to(device)

(…)large/resolve/main/tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

(…)e/pegasus-large/resolve/main/config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

(…)/pegasus-large/resolve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

(…)rge/resolve/main/special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-large and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


(…)arge/resolve/main/generation_config.json:   0%|          | 0.00/260 [00:00<?, ?B/s]

In [None]:
def chunks(list_of_elements, batch_size):
    """Yield successive batch-sized chunks from list_of_elements."""
    for i in range(0, len(list_of_elements), batch_size):
        yield list_of_elements[i : i + batch_size]

def evaluate_summaries_pegasus(dataset, metric, model, tokenizer,
                               batch_size=16, device=device,
                               note_text="extractive_notes_summ",
                               note_summary="target_text"):
    note_batches = list(chunks(dataset[note_text], batch_size))
    target_batches = list(chunks(dataset[note_summary], batch_size))

    for note_batch, target_batch in tqdm(
        zip(note_batches, target_batches), total=len(note_batches)):

        inputs = tokenizer(note_batch, max_length=1024,  truncation=True,
                        padding="max_length", return_tensors="pt")

        summaries = model.generate(input_ids=inputs["input_ids"].to(device),
                         attention_mask=inputs["attention_mask"].to(device),
                         length_penalty=0.8, num_beams=8, max_length=128)

        decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
                                clean_up_tokenization_spaces=True)
               for s in summaries]
        decoded_summaries = [d.replace("<n>", " ") for d in decoded_summaries]
        metric.add_batch(predictions=decoded_summaries, references=target_batch)

    score = metric.compute()
    return score

In [None]:
score = evaluate_summaries_pegasus(
    mimiciii_dataset['test'], rouge_metric, model, tokenizer,
    batch_size=2, note_text="extractive_notes_summ", note_summary="target_text")

rouge_methods = ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
rouge_dict = dict((rm, score[rm].mid.fmeasure) for rm in rouge_methods)
df = pd.DataFrame(rouge_dict, index=[f"pegasus-large"])
print(df)
df.to_csv('pegasus-large-BASE-mimiciii-v2-rogue-metrics.csv', index = False)

100%|██████████| 2678/2678 [2:44:12<00:00,  3.68s/it]


                 rouge1    rouge2    rougeL  rougeLsum
pegasus-large  0.078553  0.009239  0.052366   0.052339
