In [1]:
import os
os.chdir("../")
os.getcwd()

'/home/izam/coding/Loan-Prediction-System'

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    metric_file_name: Path
    target_column: str

In [3]:
from loanPrediction.constants import *
from loanPrediction.utils.common import read_yaml, create_directories, save_json

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path = config.model_path,
            metric_file_name = config.metric_file_name,
            target_column = schema.name,
           
        )

        return model_evaluation_config

In [5]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
from sklearn.metrics import confusion_matrix
from loanPrediction.utils.common import load_bin, save_json, round_batch
from loanPrediction import logger
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    

    def _eval_metrics(self,actual, pred):
        acc = accuracy_score(actual, pred)
        precision = precision_score(actual, pred)
        recall = recall_score(actual, pred)
        f1 = f1_score(actual, pred)
        rocauc_score = roc_auc_score(actual, pred)

        return round_batch(acc, precision, recall, f1, rocauc_score)
    
    def _eval_pics(self, actual, pred_proba):
        logger.info(actual.shape)
        fpr, tpr, thresholds = roc_curve(actual, pred_proba)
        roc_auc = auc(fpr, tpr)

        # Plot ROC curve
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Guessing')
        plt.xlabel('False Positive Rate (FPR)')
        plt.ylabel('True Positive Rate (TPR)')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc='lower right')

        plt.savefig(os.path.join(self.config.root_dir, "roc_curve.png"))
        plt.close()
        logger.info(f"Saved the ROC curve image at {os.path.join(self.config.root_dir, 'roc_curve.png')}")

        #confusion matrix
        y_pred = (pred_proba > 0.5).astype(int)
        cm = confusion_matrix(actual, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 16})
        plt.xlabel('Predicted Labels')
        plt.ylabel('True Labels')
        plt.title('Confusion Matrix')

        plt.savefig(os.path.join(self.config.root_dir, "confusion_matrix.png"))
        plt.close()
        logger.info(f"Saved the cunfusion matrix image at {os.path.join(self.config.root_dir, 'confusion_matrix.png')}")


    def evaluate(self):
        df = pd.read_csv(self.config.test_data_path)
        X = df.drop([self.config.target_column], axis=1)
        y = df[self.config.target_column]

        model = load_bin(path=Path(self.config.model_path))
        pred = model.predict(X)
        pred_proba = model.predict_proba(X)[:, 1]

        logger.info(f"predicted {pred.shape[0]} data points")
    
        acc, precision, recall, f1, rocauc_score = self._eval_metrics(y, pred)
        self._eval_pics(y, pred_proba)

        metric = {
            "Accuracy" : acc,
            "Precision" : precision,
            "Recall": recall,
            "F1 score": f1,
            "roc auc score": rocauc_score,
        }

        logger.info(f"metrics are - {metric}")

        save_json(path=Path(self.config.metric_file_name), data=metric)

In [7]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.evaluate()
except Exception as e:
    raise e

[2023-12-11 18:03:25,758: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-12-11 18:03:25,764: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-11 18:03:25,767: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-12-11 18:03:25,767: INFO: common: created directory at: artifacts]
[2023-12-11 18:03:25,768: INFO: common: created directory at: artifacts/model_evaluation]
[2023-12-11 18:03:25,809: INFO: common: binary file loaded from: artifacts/model_trainer/model.joblib]
[2023-12-11 18:03:25,812: INFO: 2867332010: predicted 123 data points]
[2023-12-11 18:03:25,821: INFO: 2867332010: (123,)]
[2023-12-11 18:03:25,961: INFO: 2867332010: Saved the ROC curve image at artifacts/model_evaluation/roc_curve.png]
[2023-12-11 18:03:26,067: INFO: 2867332010: Saved the cunfusion matrix image at artifacts/model_evaluation/confusion_matrix.png]
[2023-12-11 18:03:26,068: INFO: 2867332010: metrics are - {'Accuracy': 0.772, 'Precision': 0.759, 'Recall': 