In [1]:
from collections import namedtuple
import os
import json
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score,accuracy_score,mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
os.chdir("../")

In [2]:
pwd

'd:\\projects\\credit_default_ml_project'

In [3]:
ModelTuningConfig = namedtuple("ModelTuningConfig", [
    "root_dir",  
    "model_scores",
    "model_name"
])

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTuningConfig:
    root_dir: Path
    model_scores: str
    model_name: str

In [5]:
from Credit_Default.constants import *
from Credit_Default.utils import *

In [6]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH ):
        self.config = read_yaml(config_filepath) 
        self.schema = read_yaml(schema_filepath)       
        
       

    def get_model_tuning_config(self) -> ModelTuningConfig:
        config = self.config.model_tuning
        
        create_directories([config.root_dir])

        model_tuning_config = ModelTuningConfig(
            root_dir = Path(config.root_dir), 
            model_scores = str(config.model_scores),
            model_name = str(config.model_name)
        )

        return model_tuning_config

In [7]:
import os

from pathlib import Path
import pandas as pd
import numpy as np
from Credit_Default.entity import *
import pickle

class ModelTuning:
    def __init__(self, config: ModelTuningConfig):
        self.config = config
        self.train_array = np.load(TRAIN_ARRAY_FILE_PATH)
        self.test_array = np.load(TEST_ARRAY_FILE_PATH)
        self.model_list = pd.read_csv(MODEL_LIST_FILE_PATH)


    def best_model_csv(self):
        try:
            a=self.model_list[self.model_list.Accucracy_score == self.model_list.Accucracy_score.max()].Model.values[0]
            print(f"Best Model among model csv : {a}")
        except Exception as e:
            raise e

    def model_tuning_and_saving_parameters(self):
        try:
            rfc = RandomForestClassifier()
            parameters = {
                        "n_estimators":[5,10,50,100,120],
                        "max_depth":[2,4,8,16,18],
                        "criterion":['gini','entropy']}
            x_train,y_train = self.train_array[:,:-1],self.train_array[:,-1]
            x_test,y_test = self.test_array[:,:-1],self.test_array[:,-1]
            CV_rfc = GridSearchCV(estimator=rfc, param_grid=parameters, cv= 5)
            CV_rfc.fit(x_train, y_train)
            write_yaml(PARAMS_FILE_PATH ,CV_rfc.best_params_)
        except Exception as e:
            raise e
    

    def saving_model_scores(self):
        try:
            param=read_yaml(PARAMS_FILE_PATH)
            model=RandomForestClassifier(criterion=param['criterion'],
                                        max_depth=param['max_depth'],
                                        n_estimators=param['n_estimators'])
            score=model_score(self.train_array,self.test_array,model)
            
            dict1={'Model' : 'Random_Forest_Classifier',
            'Accucracy_score' : score[0],
            'ROC_AUC_Score' : score[1],
            'MSE': score[-1]} 

            model_score_file_path = os.path.join(self.config.root_dir,self.config.model_scores)
           
            with open(model_score_file_path,"w") as report_file:
                json.dump(dict1, report_file, indent=4)
            logger.info(f"Tuned Model score in .json format added.")
        except Exception as e:
            raise e
    
    def saving_model(self):
        try:
            x_train,y_train = self.train_array[:,:-1],self.train_array[:,-1]
            x_test,y_test = self.test_array[:,:-1],self.test_array[:,-1]
            param=read_yaml(PARAMS_FILE_PATH)
            model=RandomForestClassifier(criterion=param['criterion'],
                                        max_depth=param['max_depth'],
                                        n_estimators=param['n_estimators'])
            model.fit(x_train,y_train)            
            model_filepath = os.path.join(self.config.root_dir,self.config.model_name)
            pickle.dump(model, open(model_filepath, 'wb'))
            logger.info(f"Tuned Model pickle file saved.")
        
        except Exception as e:
            raise e

In [12]:
try:
    config = ConfigurationManager()
    model_tuning_config = config.get_model_tuning_config()
    model_tuning = ModelTuning(config=model_tuning_config) 
    model_tuning.best_model_csv()
    model_tuning.model_tuning_and_saving_parameters()
    model_tuning.saving_model_scores()
    model_tuning.saving_model()
 


except Exception as e:
    raise e

Best Model among model csv : Random_Forest_Classifier
