## Model Evaluation

Next, we simply need to run our best model on the test data and calculate and log some evaluation metrics. 

In [1]:
import os
os.chdir("../")

Let's  define our standard entity configuration class.

In [2]:
## setup entity

from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelEvaluationConfig:    
    """
    Configuration class for model evaluation operations.
    """
    root_dir: Path
    test_data_path: Path
    model_path: Path
    experiment_name: str
    target_column: str
    experiment_name: str
    train_run_id_path: Path

In [3]:
from src.datascience.constants import * 
from src.datascience.utils.common import read_yaml, create_directories, save_json
from src.datascience import logger
from src.datascience.config.configuration import DataIngestionConfig


class ConfigurationManager:
    """
    Configuration manager for handling YAML configuration files.
    
    This class loads configuration, parameters, and schema files and provides
    methods to retrieve specific configuration objects.
    """
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath= PARAMS_FILE_PATH, schema_filepath = SCHEMA_FILE_PATH ):

        """
        Initialize the ConfigurationManager.
        
        Args:
            config_filepath (Path): Path to the main configuration file
            params_filepath (Path): Path to the parameters file
            schema_filepath (Path): Path to the schema file
        """
          
        try:
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schema_filepath)
            
            # Create artifacts root directory
            create_directories([self.config.artifacts_root])
            logger.info("ConfigurationManager initialized successfully")
            
        except Exception as e:
            logger.error(f"Error initializing ConfigurationManager: {e}")
            raise
    
    def get_model_evaluation_config(self)-> ModelEvaluationConfig:
        config = self.config.model_evaluation
        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path = config.test_data_path,
            model_path = config.model_path,
            experiment_name = "rain-prediction",
            target_column = config.target_column,
            train_run_id_path = config.train_run_id_path
        )

        return model_evaluation_config

[2025-08-10 14:09:05,268: INFO: __init__: Logger initialized for the datascience package.]


Thes ModelEvaluation class is responsible for loading a trained model, evaluating it on the test set, logging performance metrics and plots to MLflow, and saving them locally as artifacts.

In [None]:
import os
import pandas as pd
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    average_precision_score,
    RocCurveDisplay,
    PrecisionRecallDisplay,
    ConfusionMatrixDisplay,
)
import matplotlib.pyplot as plt
from urllib.parse import urlparse
import mlflow
from pathlib import Path
import mlflow.sklearn
import numpy as np
import joblib
import json
from dotenv import load_dotenv
load_dotenv()

class ModelEvaluation:
    """
    Handles model evaluation after training.

    Methods:
        _init_mlflow():
            Initializes MLflow authentication, tracking URI, and experiment name.
        
        evaluate():
            - Loads the trained model and test dataset.
            - Generates predictions and probability scores.
            - Computes metrics: accuracy, precision, recall, F1, and ROC AUC.
            - Logs metrics and plots (ROC, PR curve, confusion matrix) to MLflow.
            - Saves metrics locally as JSON.
            - Logs the evaluated model to MLflow.
    """

    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def _init_mlflow(self):
        # authentication
        os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("MLFLOW_TRACKING_USERNAME")
        os.environ["MLFLOW_TRACKING_PASSWORD"] = os.getenv("MLFLOW_TRACKING_PASSWORD")
        mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000"))
        mlflow.set_experiment(self.config.experiment_name)


    def evaluate(self):
        self._init_mlflow()

        # Load the model and data
        model = joblib.load(self.config.model_path)
        test_df = pd.read_csv(self.config.test_data_path)

        X_test = test_df.drop(columns=[self.config.target_column])
        y_test = test_df[self.config.target_column].astype(int)

        # Start Evaluation

        with mlflow.start_run(run_name="evaluation"):
            # Link to training run if available

            train_run_id = ""
            try:
                train_run_id = Path(self.config.train_run_id_path).read_text().strip()
            except Exception:
                pass
            
            # Predictions
            if hasattr(model, "predict_proba"):
                y_score = model.predict_proba(X_test)[:, 1]
            elif hasattr(model, "decision_function"):
                y_score = model.decision_function(X_test)
            else:
                y_score = None

            y_pred = model.predict(X_test)
            report = classification_report(y_test, y_pred, output_dict=True)

            metrics = {
                        "accuracy": float(report["accuracy"]),
                        "precision_weighted": float(report["weighted avg"]["precision"]),
                        "recall_weighted": float(report["weighted avg"]["recall"]),
                        "f1_weighted": float(report["weighted avg"]["f1-score"]),
                    }
        
            # we need y_score not to be none in order to be able to calculate roc_auc
            if y_score is not None:
                metrics["roc_auc"] = float(roc_auc_score(y_test, y_score))

            # Log the metrics to mlflow
            mlflow.log_metrics(metrics)

            # save the metrics as artifact

            metrics_path = Path(os.path.join(self.config.root_dir, "test_metrics.json"))
            metrics_blob = {"metrics": metrics, "classification_report": report}
            metrics_path.write_text(json.dumps(metrics_blob, indent=2)) 
            mlflow.log_artifact(str(metrics_path))


            # Plots

            if y_score is not None:
                RocCurveDisplay.from_predictions(y_test, y_score)
                mlflow.log_figure(plt.gcf(), "roc_curve.png")
                plt.close()

                PrecisionRecallDisplay.from_predictions(y_test, y_score)
                mlflow.log_figure(plt.gcf(), "pr_curve.png")
                plt.close()


            ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
            mlflow.log_figure(plt.gcf(), "confusion_matrix.png")
            plt.close()

            # log evaluated model
            mlflow.sklearn.log_model(model, artifact_path="evaluated_model")




    

In [5]:
try: 
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.evaluate()
    
except Exception as e:
    raise e

[2025-08-10 14:09:06,303: INFO: common: YAML file: config/config.yaml loaded successfully]
[2025-08-10 14:09:06,307: INFO: common: YAML file: params.yaml loaded successfully]
[2025-08-10 14:09:06,308: INFO: common: YAML file: schema.yaml loaded successfully]
[2025-08-10 14:09:06,309: INFO: common: Created directory at artifacts]
[2025-08-10 14:09:06,309: INFO: 1362631697: ConfigurationManager initialized successfully]
[2025-08-10 14:09:06,310: INFO: common: Created directory at artifacts/model_evaluation]


