In [2]:
import os

In [3]:
%pwd

'd:\\Repositories\\ml-churn\\research'

In [4]:
os.chdir("d:/Repositories/ml-churn/")

In [5]:
## Here comes the keys for mlflow

In [6]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    model_name: str
    target_column: str 
    mlflow_uri: str
    all_params: dict


In [7]:
from Churn_analysis.constants import *
from Churn_analysis.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config.artifacts_root])


    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.XGBoost
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            test_data_path  = config.test_data_path,
            model_path = config.model_path,
            model_name = config.model_name_1,
            target_column= schema.target_1,
            mlflow_uri= "https://dagshub.com/FBrownp/ml-churn.mlflow",
            all_params= params
            )
        return model_evaluation_config

In [9]:
import os

from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc


import joblib
import pandas as pd


from Churn_analysis.utils.common import save_json, calculate_metrics

import mlflow
import mlflow.xgboost
from  urllib.parse import urlparse


In [12]:
class ModelEvaluation():
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def get_model_evaluation_object(self):
        
        test_data_df      = pd.read_csv(self.config.test_data_path)
        
        y_test = test_data_df[self.config.target_column]
        X_test = test_data_df.drop(columns= self.config.target_column)

        model = joblib.load(os.path.join(self.config.model_path,self.config.model_name))

        conf_matrix = confusion_matrix(y_test, model.predict(X_test))

        y_proba = model.predict_proba(X_test)[:, 1]
        # Compute ROC curve and AUC
        fpr, tpr, thresholds = roc_curve(y_test, y_proba)
        roc_auc = auc(fpr, tpr)



        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme



        with mlflow.start_run() as run:
            run_id = run.info.run_uuid

            scores = calculate_metrics(conf_matrix)
            save_json(path= Path(os.path.join(self.config.root_dir,"scores.json")), data = scores)
  
            mlflow.log_params(self.config.all_params)

            for key in scores.keys():
                mlflow.log_metric(key,scores[key])
                print(key,scores[key])
            mlflow.log_metric("ROC_AUC",roc_auc)

            if tracking_url_type_store != "file":
                mlflow.xgboost.log_model(model, "Churn_model", registered_model_name="Churn_model")
            else:
                mlflow.xgboost.log_model(model, "Churn_model")



In [13]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config = model_evaluation_config)
    model_evaluation.get_model_evaluation_object()

except Exception as e:
    raise e

[2024-01-20 13:23:55,114: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-01-20 13:23:55,117: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-20 13:23:55,119: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-01-20 13:23:55,121: INFO: common: created directory at: artifacts]
[2024-01-20 13:23:55,122: INFO: common: created directory at: artifacts/model_evaluation]
[2024-01-20 13:23:55,278: INFO: common: json file saved at: artifacts\model_evaluation\scores.json]
Accuracy 0.7924170616113744
Precision 0.600326264274062
Recall 0.6559714795008913
Specificity 0.841833440929632
F1 Score 0.6269165247018741
FPR 0.15816655907036797
FDR 0.399673735725938
FNR 0.34402852049910876
MCC 0.48442348202857055


