In [1]:
import os
os.chdir("../")
os.getcwd()

'/home/izam/coding/Customer-Churn'

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    params: dict
    target_col: str

In [3]:
from CustomerChurn.constants import *
from CustomerChurn.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        target_col = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            params = self.params,
            target_col=target_col.name,
        )

        return model_trainer_config

In [5]:
import pandas as pd
from box import ConfigBox
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from CustomerChurn import logger
from CustomerChurn.utils.common import save_bin

# models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier



In [6]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        train = pd.read_csv(self.config.train_data_path)
        test = pd.read_csv(self.config.test_data_path)

        self.X_train= train.drop([self.config.target_col], axis=1)
        self.y_train = train[self.config.target_col]
        self.X_test = test.drop([self.config.target_col], axis=1)
        self.y_test = test[self.config.target_col]   


    def _randomized_search(self, name,clf,params, runs=50): 
        rand_clf = RandomizedSearchCV(clf, params, n_iter=runs, cv=5, n_jobs=-1, random_state=2, verbose=0)     

        rand_clf.fit(self.X_train, self.y_train) 
        best_model = rand_clf.best_estimator_
        
        # Extract best score
        best_score = rand_clf.best_score_

        # Print best score
        logger.info("Trained with {} with score: {:.3f}".format(name, best_score))

        # Predict test set labels
        y_pred = best_model.predict(self.X_test)

        # Compute accuracy
        accuracy = accuracy_score(self.y_test, y_pred)

        # Print accuracy
        logger.info('Predicted with {} ; Test score : {:.3f}'.format(name, accuracy))
        
        return best_model, accuracy
        

    def train(self):
        model_params = self.config.params

        models = ConfigBox({
            "Decision_Tree": {
                "model" : DecisionTreeClassifier(),
                "params" : model_params.Decision_Tree
            },
            "Random_Forest": {
                "model" : RandomForestClassifier(),
                "params" : model_params.Random_Forest
            },
            "SVC": {
                "model" : SVC(),
                "params" : model_params.SVC
            },
            "LogisticRegression":{
                "model" : LogisticRegression(),
                "params" : model_params.LogisticRegression
            },
            "MultinomialNB":{
                "model" : MultinomialNB(),
                "params" : model_params.MultinomialNB
            },
            "GradientBoost":{
                "model": GradientBoostingClassifier(),
                "params" : model_params.GradientBoost
            },
            "AdaBoost":{
                "model" : AdaBoostClassifier(),
                "params" : model_params.AdaBoost
            },
            "XGBoost":{
                "model" : XGBClassifier(),
                "params" : model_params.XGBoost
            },
            "LGBM":{
                "model" : LGBMClassifier(),
                "params" : model_params.LGBM
            }
        })

        create_directories([os.path.join(self.config.root_dir, "models")])
        trained_models = []
        for model in models:
            clf = models[model].model
            params = models[model].params

            clf_model, score = self._randomized_search(name=str(model) ,clf=clf, params=params)
            trained_models.append((clf_model, score))

            save_bin(data=clf_model, path=Path(os.path.join(self.config.root_dir, f"models/{str(model)}.joblib")))
        
        trained_models = sorted(trained_models, key=lambda x:x[1], reverse=True)  # [(model, score), (model, score), ..]
        best_model = trained_models[0][0]  # taking the model

        save_bin(data=best_model, path=Path(os.path.join(self.config.root_dir, self.config.model_name)))

        best_model_name = str(best_model)[:str(best_model).find("(")]
        best_model_score = round(trained_models[0][1], 3)
        logger.info(f"Saved main model as {best_model_name}, with score - {best_model_score}")

In [7]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2023-12-15 21:33:40,521: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-12-15 21:33:40,525: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-15 21:33:40,530: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-12-15 21:33:40,531: INFO: common: created directory at: artifacts]
[2023-12-15 21:33:40,532: INFO: common: created directory at: artifacts/model_trainer]
[2023-12-15 21:33:40,555: INFO: common: created directory at: artifacts/model_trainer/models]




[2023-12-15 21:33:42,473: INFO: 4081035710: Trained with Decision_Tree with score: 0.732]
[2023-12-15 21:33:42,477: INFO: 4081035710: Predicted with Decision_Tree ; Test score : 0.730]
[2023-12-15 21:33:42,486: INFO: common: binary file saved at: artifacts/model_trainer/models/Decision_Tree.joblib]




[2023-12-15 21:33:43,597: INFO: 4081035710: Trained with Random_Forest with score: 0.794]
[2023-12-15 21:33:43,620: INFO: 4081035710: Predicted with Random_Forest ; Test score : 0.805]
[2023-12-15 21:33:43,679: INFO: common: binary file saved at: artifacts/model_trainer/models/Random_Forest.joblib]




[2023-12-15 21:33:46,365: INFO: 4081035710: Trained with SVC with score: 0.791]
[2023-12-15 21:33:46,508: INFO: 4081035710: Predicted with SVC ; Test score : 0.809]
[2023-12-15 21:33:46,511: INFO: common: binary file saved at: artifacts/model_trainer/models/SVC.joblib]
[2023-12-15 21:33:46,603: INFO: 4081035710: Trained with LogisticRegression with score: 0.798]
[2023-12-15 21:33:46,607: INFO: 4081035710: Predicted with LogisticRegression ; Test score : 0.806]
[2023-12-15 21:33:46,609: INFO: common: binary file saved at: artifacts/model_trainer/models/LogisticRegression.joblib]
[2023-12-15 21:33:46,706: INFO: 4081035710: Trained with MultinomialNB with score: 0.776]
[2023-12-15 21:33:46,708: INFO: 4081035710: Predicted with MultinomialNB ; Test score : 0.793]
[2023-12-15 21:33:46,709: INFO: common: binary file saved at: artifacts/model_trainer/models/MultinomialNB.joblib]




[2023-12-15 21:33:52,983: INFO: 4081035710: Trained with GradientBoost with score: 0.797]
[2023-12-15 21:33:52,987: INFO: 4081035710: Predicted with GradientBoost ; Test score : 0.813]
[2023-12-15 21:33:52,991: INFO: common: binary file saved at: artifacts/model_trainer/models/GradientBoost.joblib]




[2023-12-15 21:33:56,164: INFO: 4081035710: Trained with AdaBoost with score: 0.796]
[2023-12-15 21:33:56,203: INFO: 4081035710: Predicted with AdaBoost ; Test score : 0.815]
[2023-12-15 21:33:56,250: INFO: common: binary file saved at: artifacts/model_trainer/models/AdaBoost.joblib]




[2023-12-15 21:33:56,734: INFO: 4081035710: Trained with XGBoost with score: 0.797]
[2023-12-15 21:33:56,739: INFO: 4081035710: Predicted with XGBoost ; Test score : 0.820]
[2023-12-15 21:33:56,743: INFO: common: binary file saved at: artifacts/model_trainer/models/XGBoost.joblib]




[2023-12-15 21:33:57,724: INFO: 4081035710: Trained with LGBM with score: 0.799]
[2023-12-15 21:33:57,729: INFO: 4081035710: Predicted with LGBM ; Test score : 0.818]
[2023-12-15 21:33:57,733: INFO: common: binary file saved at: artifacts/model_trainer/models/LGBM.joblib]
[2023-12-15 21:33:57,744: INFO: common: binary file saved at: artifacts/model_trainer/model.joblib]
[2023-12-15 21:33:57,748: INFO: 4081035710: Saved main model as XGBClassifier, with score - 0.82]
