In [30]:
import os
os.chdir("../")
%pwd

'/home/jatin/Projects'

In [49]:
from dataclasses import dataclass
from pathlib import Path

In [55]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    """
    Storing configuration related to the model trainer.
    """
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    params: dict
    target_column: dict

In [None]:
"""
Configuration manager module for the Customer Churn Prediction project.
Handles loading and managing project configuration, parameters, and schema.
"""

from customer_churn_prediction.constants import (
    CONFIG_FILE_PATH, 
    PARAMS_FILE_PATH,
    SCHEMA_FILE_PATH 
)
from customer_churn_prediction.entity.config_entity import (
    DataIngestionConfig,
    DataValidationConfig,
    DataTransformationConfig,
    ModelTrainerConfig
)
from customer_churn_prediction.utils.common import create_directory, read_yaml


class ConfigurationManager:
    """
    Handles loading and managing configuration, 
    parameters and schema for the project.
    """
    def __init__(
            self,
            config_path=CONFIG_FILE_PATH,
            schema_path=SCHEMA_FILE_PATH,
            params_path=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_path)
        self.schema = read_yaml(schema_path)
        self.params = read_yaml(params_path)

        create_directory([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        """
        Return Data Ingestion configuration.
        """
        config = self.config.data_ingestion
        create_directory([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            kaggle_dataset = config.kaggle_dataset,
            file = config.file,
            local_data_file = config.local_data_file,
            data_dir = config.data_dir
        )
        return data_ingestion_config

    def get_data_validation_config(self)-> DataValidationConfig:
        """
        Return Data validation configuration.
        """
        config = self.config.data_validation
        schema = self.schema.COLUMNS
        create_directory([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            local_data_file=config.local_data_file,
            status_file=config.status_file,
            status_message_file=config.status_message_file,
            all_schema=schema
        )
        return data_validation_config
    
    def get_data_transformation_config(self)-> DataTransformationConfig:
        """
        Return data transformation config
        """
        config = self.config.data_transformation
        schema = self.schema.COLUMNS
        target_column = self.schema.TARGET_COLUMN
        params = self.params
        create_directory([config.root_dir])
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            local_data_file=config.local_data_file,
            filtered_data_file=config.filtered_data_file,
            encoded_data_file=config.encoded_data_file,
            encoder_file=config.encoder_file,
            schema=schema,
            target_column=target_column,
            params=params
        )
        return data_transformation_config
    
    def get_model_trainer_config(self)-> ModelTrainerConfig:
        """
        Train the multiple models and pick the best one
        """
        config = self.config.model_trainer
        params = self.params
        target_column = self.schema.TARGET_COLUMN
        create_directory([config.root_dir])
        model_trainer = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            params=params,
            target_column=target_column.name
        )
        return model_trainer

In [61]:
configuration_manager = ConfigurationManager()
model_selection_config = configuration_manager.get_model_trainer_config()

[2026-01-17 22:02:02,460]:INFO:common.py:Yaml file: config/config.yaml is loaded successfully
[2026-01-17 22:02:02,466]:INFO:common.py:Yaml file: schema.yaml is loaded successfully
[2026-01-17 22:02:02,475]:INFO:common.py:Yaml file: params.yaml is loaded successfully
[2026-01-17 22:02:02,478]:INFO:common.py:Directory created at: artifacts
[2026-01-17 22:02:02,480]:INFO:common.py:Directory created at: artifacts/model_trainer


In [None]:
"""
Model Trainer component to trains multiple models with different hyperparameters
and selects the best one based on evaluation metrics.
"""


import importlib
import itertools
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import recall_score

from customer_churn_prediction import logger
from customer_churn_prediction.entity.config_entity import ModelTrainerConfig

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def _get_class_from_string(self, full_class_path):
        """
        Dynamically import model class.
        """
        module_name, class_name = full_class_path.rsplit('.',1)
        module = importlib.import_module(module_name)
        return getattr(module, class_name)
    
    def load_train_test_split(self):
        """
        Load and split the training and testing datasets into features and target variables.
        """
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        train_x = train_data.drop([self.config.target_column],axis=1)
        train_y = train_data[self.config.target_column]

        test_x = test_data.drop([self.config.target_column],axis=1)
        test_y = test_data[self.config.target_column]

        return train_x, train_y, test_x, test_y

    
    def train_and_select_best_model(self):
        """
        Train specified models and return the best one.
        """
        best_model = None
        best_score = - np.inf
        best_model_name = None
        train_x, train_y, test_x, test_y = self.load_train_test_split()
        for model_name, model_config in self.config.params.models.items():
            model_class = self._get_class_from_string(model_config.model_class)
            param_grid = model_config.params

            keys, values = zip(*param_grid.items())
            for combination in itertools.product(*values):
                params_dict = dict(zip(keys, combination))
                model = model_class(**params_dict)

                model.fit(train_x,train_y)
                y_pred = model.predict(test_x)

                score = recall_score(test_y, y_pred) # As our False Negative is more important in this usecase

                logger.info(f"{model_name} | Params: {params_dict} | F1: {score:.4f}")

                if score > best_score:
                    best_score = score
                    best_model = model
                    best_model_name = model_name
        logger.info(f"Best model: {best_model_name} with F1-score={best_score:.4f}")
        joblib.dump(best_model, os.path.join(self.config.root_dir,self.config.model_name))
        logger.info(f"Best model saved at: {os.path.join(self.config.root_dir,self.config.model_name)}")
        return best_model, best_model_name, best_score


    

In [47]:
# Create the pipeline

from customer_churn_prediction import logger

try:
    config = ConfigurationManager()
    model_selection_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(model_selection_config)
    model_trainer.train_and_select_best_model()
except Exception:
    logger.exception(
        f"Exception occured while executing the model training and selection pipeline")
    raise

[2026-01-17 21:54:06,511]:INFO:common.py:Yaml file: config/config.yaml is loaded successfully
[2026-01-17 21:54:06,516]:INFO:common.py:Yaml file: schema.yaml is loaded successfully
[2026-01-17 21:54:06,525]:INFO:common.py:Yaml file: params.yaml is loaded successfully
[2026-01-17 21:54:06,527]:INFO:common.py:Directory created at: artifacts
[2026-01-17 21:54:06,529]:INFO:common.py:Directory created at: artifacts/model_trainer
[2026-01-17 21:54:06,530]:ERROR:305617296.py:Exception occured while executing the model training and selection pipeline
Traceback (most recent call last):
  File "/tmp/ipykernel_47400/305617296.py", line 7, in <module>
    model_selection_config = config.get_model_trainer_config()
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_47400/741302221.py", line 99, in get_model_trainer_config
    model_trainer = ModelTrainerConfig(
                    ^^^^^^^^^^^^^^^^^^^
TypeError: ModelTrainerConfig.__init__() got an unexpected keywor

TypeError: ModelTrainerConfig.__init__() got an unexpected keyword argument 'params'

In [6]:
!pwd

/home/jatin/Projects/customer_churn_prediction


In [12]:
data = read_yaml(Path('params.yaml'))

[2026-01-17 21:24:24,048]:INFO:common.py:Yaml file: params.yaml is loaded successfully


In [16]:
print(data.models)

{'logistic_regression': {'model_class': 'sklearn.linear_model.LogisticRegression', 'params': {'C': [0.01, 0.1, 1.0, 10.0], 'solver': ['liblinear', 'lbfgs']}}, 'random_forest': {'model_class': 'sklearn.ensemble.RandomForestClassifier', 'params': {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, 20]}}, 'xgboost': {'model_class': 'xgboost.XGBClassifier', 'params': {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1, 0.2]}}}


In [26]:
for model,model_config in data.models.items():
    print(model_config.params.items())
    # keys, values = zip(*model_config.params.items())
    # print(f"{keys = }, {values = }")
    # for param, values in model_config.params.items():
    #     print(param)

dict_items([('C', BoxList([0.01, 0.1, 1.0, 10.0])), ('solver', BoxList(['liblinear', 'lbfgs']))])
dict_items([('n_estimators', BoxList([50, 100, 200])), ('max_depth', BoxList([5, 10, 20]))])
dict_items([('n_estimators', BoxList([100, 200])), ('learning_rate', BoxList([0.01, 0.1, 0.2]))])
