In [7]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py): started
  Building wheel for rouge_score (setup.py): finished with status 'done'
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24972 sha256=215637a3a69e32f87ccaf90455315615d24469f4a2e134ec2fbf52eb0181d926
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\85\9d\af\01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [1]:
!pip install evaluate



In [33]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import evaluate 
import torch
import pandas as pd
from tqdm import tqdm
import os

In [17]:
class ModelEvaluation:
    def __init__(self):
        self.root_dir = "../artifacts/model_evaluation"
        self.data_path = "../artifacts/data_transformation/samsum_dataset"
        self.model_path = "../artifacts/model_trainer/pegasus-samsum-model"
        self.tokenizer_path = "../artifacts/model_trainer/tokenizer"
        self.metric_file_name = "../artifacts/model_evaluation/metrics.csv"

    def generate_batch_sized_chunks(self,list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i:i+batch_size]

    def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer, batch_size = 16, device="cuda" if torch.cuda.is_available() else "cpu",
                                   column_text="article",column_summary="highlights"):
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches), total = len(article_batches)):
            
            inputs = tokenizer(article_batch, max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
            summaries = model.generate(
                input_ids=inputs["input_ids"].to(device),
                attention_mask=inputs["attention_mask"].to(device), 
                length_penalty=0.8, num_beams=8, max_length=128
            )
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,clean_up_tokenization_spaces=True) for s in summaries]     
            decoded_summaries = [d.replace(""," ") for d in decoded_summaries]
            metric.add_batch(predictions=decoded_summaries, references=target_batch)

        score = metric.compute()
        return score

    def initiate_evaluation(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.model_path).to(device)
        
        dataset_samsum_pt = load_from_disk(self.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]

        rouge_metric = evaluate.load("rouge")

        score = self.calculate_metric_on_test_ds(
        dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer, batch_size = 2, column_text = 'dialogue', column_summary= 'summary'
            )
        return score


In [19]:
model_evaluation_obj = ModelEvaluation()
score = model_evaluation_obj.initiate_evaluation()

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:58<00:00, 47.67s/it]


In [39]:

metric_file_name = "../artifacts/model_evaluation/metrics.csv"
rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
rouge_dict = dict((rn, score[rn]) for rn in rouge_names)

# Create a DataFrame
df = pd.DataFrame(rouge_dict, index=['pegasus'])

# Ensure the directory exists before writing the file
directory = os.path.dirname(metric_file_name)
if not os.path.exists(directory):
    os.makedirs(directory)

# Write the DataFrame to a CSV file
df.to_csv(metric_file_name, index=False)
print(f"Metrics saved to '{metric_file_name}'")


PermissionError: [Errno 13] Permission denied: '../artifacts/model_evaluation/metrics.csv'