In [None]:
!pip install --upgrade accelerator

In [None]:
!pip uninstall y transformers accelerate

In [None]:
!pip install transformers accelerate

In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir('../')

In [None]:
%pwd

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [None]:
from LangWizAI.constants import *
from LangWizAI.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        create_directories([config.root_dir])
        
        model_evaluation_config = ModelEvaluationConfig(
            root_dir = self.config.root_dir,
            data_path = config.data.path,
            model_path = config.model.path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name
        )
        
        return model_evaluation_config
        

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk, load_metric
import torch
import pandas as pd
from tqbdm import tqdm
from nltk.translate.bleu_score import sentence_bleu

In [None]:
class ModelEvaluation:
    def __init__(self, config:ModelEvaluationConfig):
        self.config = config
        
    def generate_batch_sized_chunks(selfself,list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i:i+batch_size]
            
    def calculate_metric_on_test_ds(self,dataset,metric, model, tokenizer, batch_size=16,device = 'cuda' if torch.cuda.is_available() else 'cpu', column_input_text = 'source',column_output_text = 'reference'):
        input_batches = list(self.generate_batch_sized_chunks(dataset[column_input_text],batch_size))
        target_batches=list(self.generate_batch_sized_chunks(dataset[column_output_text], batch_size))
        
        for input_batch, target_batch in tqdm(
            zip(input_batches,target_batches),total=len(input_batches):
            
            inputs = tokenizer(input_batch,max_length=128, truncation  =True, Padding = 'max_length', return_tensors = 'pt')
            
            outputs = model.generate(input_ids = inputs['input_ids'].to(device), 
                                     attention_mask = inputs['attention_mask'].to(device),
                                     length_penalty = 0.8, num_beams = 8, max_length =128)
            
            
            decoded_outputs= [tokenizer.decode(s, skip_special_tokens = True, clean_up_tokenization_spaces = True) for s in outputs]
            
            decoded_outputs = [d.replace("", " ") for d in decoded_outputs]
            
            metrics = add_batch(predictions=decoded_outputs,references=target_batch)
            
        score = metric.compute()
        return score
    
    def evaluate(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        fr_en_model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
        
        data_pt = load_from_disk(self.config.data_path)
        
        bleu_metric = load_metric('sentence_bleu')
        
        score = self.calculate_metric_on_test_ds(data_pt['test'][0:10], bleu_metric, model='fr_en_model',tokenizer= 'fr_en_tokenizer', batch_size =2, column_input_text = 'source',column_output_text = 'reference')
        
        df = pd.DataFrame(score, index = ['fr_en'])
        df.to_csv(self.config.metric_file_name, index = False)
        
            
    

In [None]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config= model_evaluation_config)
    model_evaluation_config.evaluate()
except Exception as e:
    raise e