In [1]:
!pwd

/home/gourav/ML/Text_Classification_Model_Builder/research


In [2]:
import os
os.chdir("../")

In [3]:
!pwd

/home/gourav/ML/Text_Classification_Model_Builder


In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class EvaluateModelConfig:
    test_data_path: Path 
    pretrained_model_path : Path

In [5]:
from src.constants import *
from src.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:

    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_evaluate_model_config(self) -> EvaluateModelConfig:

        config = self.config.evaluate_model
        params = self.params.model_params
        model_path = Path(os.path.join(config.saved_model_dir, params.model_name ,"model"))
        
        evaluate_model_config = EvaluateModelConfig(
            test_data_path =Path(config.test_data_path),
            pretrained_model_path = model_path
        )

        return evaluate_model_config

In [7]:
obj = ConfigurationManager()
evaluation_config = obj.get_evaluate_model_config()
print(evaluation_config)

[2024-06-03 17:08:58,070: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-06-03 17:08:58,090: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-03 17:08:58,094: INFO: common: already created directory: artifacts]
EvaluateModelConfig(test_data_path=PosixPath('artifacts/split_data'), pretrained_tokenizer_path=PosixPath('artifacts/models/bert-base-uncased/tokenizer'), pretrained_model_path=PosixPath('artifacts/models/bert-base-uncased/model'))


In [None]:
import torch
from src.utils.common import load_json, join_path
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import Dataset
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

In [8]:
class EvaluateModel:
    def __init__(self, config = EvaluateModelConfig):
        self.config = config

    def get_predictions(self,input_ids):
        predicted_labels = []
        pretrained_model = AutoModelForSequenceClassification.from_pretrained(self.config.pretrained_model_path)
        for i in range(len(input_ids)):
            predicted_input = torch.tensor([input_ids[i]])
            preds = pretrained_model(predicted_input)
            predicted_labels.append(preds.logits.argmax())
            
        return predicted_labels
    
    def get_evaluation_report(self, actual_labels, predicted_labels):
        precision, recall, f1, _ = precision_recall_fscore_support(actual_labels, predicted_labels, average='macro')

        acc = accuracy_score(actual_labels, predicted_labels)
        report = {
            'Accuracy': acc,
            'F1': f1,
            'Precision': precision,
            'Recall': recall
        }

        return report

    def get_model_evaluation(self):

        test_data = load_json(self.config.test_data_path)
        input_ids = test_data["input_ids"]

        predicted_labels = self.get_predictions(input_ids)
        actual_labels = test_data["labels"]

        model_evaluation_result = self.get_evaluation_report(actual_labels, predicted_labels)

        return model_evaluation_result
        

In [None]:
obj = ConfigurationManager()
evaluation_config = obj.get_evaluate_model_config()
evaluate_model = EvaluateModel(evaluation_config)
evalutaton_report = evaluate_model.get_model_evaluation()