In [9]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/84.0 kB ? eta -:--:--
   ---- ----------------------------------- 10.2/84.0 kB ? eta -:--:--
   ---- ----------------------------------- 10.2/84.0 kB ? eta -:--:--
   -------------- ------------------------- 30.7/84.0 kB 330.3 kB/s eta 0:00:01
   -------------- ------------------------- 30.7/84.0 kB 330.3 kB/s eta 0:00:01
  

In [13]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import evaluate 
import torch
import pandas as pd
from tqdm import tqdm

In [15]:
class ModelEvaluation:
    def __init__(self):
        self.root_dir = "../artifacts/model_evaluation"
        self.data_path = "../artifacts/data_transformation/samsum_dataset"
        self.model_path = "../artifacts/model_trainer/pegasus-samsum-model"
        self.tokenizer_path = "../artifacts/model_trainer/tokenizer"
        self.metric_file_name = "../artifacts/model_evaluation/metrics.csv"

    def generate_batch_sized_chuncks(self,list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i:i+batch_size]

    def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer, batch_size = 16, device="cuda" if torch.cuda.is_available() else "cpu",
                                   column_text="article",column_summary="highlights"):
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches), total = len(article_batches)):
            
            inputs = tokenizer(article_batch, max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
            summaries = model.generate(
                input_ids=inputs["input_ids"].to(device),
                attention_mask=inputs["attention_mask"].to(device), 
                length_penalty=0.8, num_beams=8, max_length=128
            )
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,clean_up_tokenization_spaces=True) for s in summaries]     
            decoded_summaries = [d.replace(""," ") for d in decoded_summaries]
            metric.add_batch(predictions=decoded_summaries, references=target_batch)

        score = metric.compute()
        return score

    def initiate_evaluation(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.model_path).to(device)
        
        dataset_samsum_pt = load_from_disk(self.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]

        rouge_metric = evaluate.load("rouge")

        score = self.calculate_metric_on_test_ds(
        dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer, batch_size = 2, column_text = 'dialogue', column_summary= 'summary'
            )
        rouge_dict = dict((rn, score[rn].mid.fmeasure ) for rn in rouge_names )

        df = pd.DataFrame(rouge_dict, index = ['pegasus'] )
        df.to_csv(self.metric_file_name, index=False)

In [17]:
model_evaluation_obj = ModelEvaluation()
model_evaluation_obj.initiate_evaluation()

OSError: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory ../artifacts/model_trainer/pegasus-samsum-model.