In [1]:
from src.utils.common import read_pickle, read_yaml, read_array
from src.data.make_dataset import DataIngestion
from src.constants import *
from src import logger
from dataclasses import dataclass
from box import ConfigBox
from typing import Optional
# model building
import sklearn as sk
from importlib import import_module
import mlflow

from notebooks import CONFIG_FILE_PATH, PARAMS_FILE_PATH

STAGE_NAME = "STAGE 2: MODEL BUILDING"
logger.name = STAGE_NAME

In [5]:
@dataclass(frozen=True)
class ModelBuildingConfig:
    model_dir: str
    sub_module: str
    model_name: str
    train_dir: str
    target_col: str
    experiment_name: str
    metrics: list

class ConfigurationManager:
    def __init__(self,
        config_file_path=CONFIG_FILE_PATH,
        params_file_path=PARAMS_FILE_PATH):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)

    def get_model_building_config(self)-> ModelBuildingConfig:
        model_building_conf = ModelBuildingConfig(
            model_dir=self.config.model.root_dir,
            sub_module=self.params.model.sub_model,
            model_name=self.params.model.model_name,
            train_dir=self.config.data_directory.processed_train,
            target_col=self.params.data.target_col,
            experiment_name=self.params.model.experiment_name,
            metrics=self.params.model.metrics
        )
        return model_building_conf
    
class BuildModel:
    def __init__(self, config: ModelBuildingConfig):
        self.config = config

    def get_model_class(self):
        try:
            module = import_module(f"sklearn.{self.config.sub_module}")
            model_class = getattr(module, self.config.model_name)
            logger.info(f"{self.config.model_name} sucessfully returned")
            return model_class
        except ModuleNotFoundError as e:
            logger.error(e)
        except Exception as e:
            logger.error(e)

    def log_metrics(self, model, metrics: list, y_true, y_pred, data_name:str="Train"):
        try:
            for metric in metrics:
                module = import_module("sklearn.metrics")
                metric_func = getattr(module, metric)
                mlflow.log_metric(data_name +" "+ metric_func.__name__, metric_func(y_true, y_pred))
            logger.info("Metrics logged sucessfully")
        except Exception as e:
            logger.error(e)
            raise e

    def fit(self, x_train, y_train):
        try:
            model = self.get_model_class()
            # initiating model
            mlflow.set_experiment(self.config.experiment_name)
            mlflow.start_run()
            model = model()
            # fit model
            model.fit(x_train, y_train)
            logger.info("Model Fitted sucessfully")
            y_pred = model.predict(x_train)
            self.log_metrics(model, self.config.metrics, y_train, y_pred)
        except Exception as e:
            logger.error(e)
            raise e
            


In [8]:
if __name__ == "__main__":
    config_manager = ConfigurationManager()
    config_params = config_manager.get_model_building_config()
    build_model = BuildModel(config_params)
    data_ingestion = DataIngestion()
    train = data_ingestion.get_data(str(Path(config_params.train_dir).resolve()))
    x_train, y_train = data_ingestion.split_x_y(train, config_params.target_col)
    build_model.fit(x_train, y_train)

2025-01-08 23:36:11,097 - STAGE 2: MODEL BUILDING - INFO - Yaml read successfully from /Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/config/config.yaml
2025-01-08 23:36:11,100 - STAGE 2: MODEL BUILDING - INFO - Yaml read successfully from /Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/params.yaml
2025-01-08 23:36:11,114 - STAGE 2: MODEL BUILDING - INFO - Data read sucessfully /Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/data/processed/train/train.csv
2025-01-08 23:36:11,115 - STAGE 2: MODEL BUILDING - INFO - Splitted data into x and y sucessfully
2025-01-08 23:36:11,115 - STAGE 2: MODEL BUILDING - ERROR - 'ModelBuildingConfig' object has no attribute 'sub_model'
2025-01-08 23:36:11,152 - STAGE 2: MODEL BUILDING - ERROR - 'NoneType' object is not callable


TypeError: 'NoneType' object is not callable

In [7]:
mlflow.end_run()

In [16]:
data_ingestion.get_data(str(Path("../data/processed/train/train.csv").resolve()))

2025-01-08 23:26:22,504 - STAGE 2: MODEL BUILDING - INFO - Data read sucessfully /Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/data/processed/train/train.csv


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,Churn
0,0.997164,2.290441,1.031376,1.522505,-0.419089,-3.077681,0.069594,-1.184663,1.411207,-1.026295,-1.029522,-0.921956,-1.111759,1.137282,-0.826263,-1.206637,1.331518,-0.826236,Yes
1,0.997164,-0.436597,-0.969579,-0.656812,-0.174887,0.324920,1.123599,0.174130,-0.914591,1.244330,1.240221,-0.921956,1.145819,1.137282,-0.826263,0.828750,0.399968,1.357307,Yes
2,0.997164,-0.436597,1.031376,1.522505,-0.052786,0.324920,-0.984412,-1.184663,-0.914591,-1.026295,1.240221,1.398482,-1.111759,-1.120053,-0.826263,-1.206637,1.331518,-0.350612,No
3,0.997164,2.290441,1.031376,-0.656812,-0.622590,0.324920,1.123599,0.174130,-0.914591,-1.026295,-1.029522,-0.921956,1.145819,1.137282,-0.826263,0.828750,0.399968,0.994769,No
4,-1.002844,-0.436597,1.031376,-0.656812,0.924020,0.324920,1.123599,-1.184663,1.411207,1.244330,-1.029522,1.398482,-1.111759,-1.120053,0.375070,0.828750,-0.531582,0.038534,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1404,0.997164,-0.436597,-0.969579,-0.656812,-0.459789,0.324920,-0.984412,1.532924,0.248308,0.109017,0.105349,0.238263,0.017030,0.008614,1.576404,-1.206637,1.331518,-1.506410,No
1405,0.997164,-0.436597,-0.969579,-0.656812,-0.052786,0.324920,-0.984412,0.174130,-0.914591,1.244330,1.240221,-0.921956,1.145819,1.137282,-0.826263,0.828750,0.399968,1.214287,No
1406,-1.002844,-0.436597,-0.969579,-0.656812,-1.273794,0.324920,-0.984412,0.174130,-0.914591,-1.026295,-1.029522,-0.921956,-1.111759,-1.120053,-0.826263,-1.206637,0.399968,0.173238,No
1407,-1.002844,-0.436597,1.031376,-0.656812,1.331023,0.324920,-0.984412,1.532924,0.248308,0.109017,0.105349,0.238263,0.017030,0.008614,1.576404,-1.206637,-1.463133,-1.514725,No


In [9]:
data_ingestion.get_data(config_params.train_dir)

2025-01-08 23:05:59,958 - STAGE 2: MODEL BUILDING - ERROR - FileNotFoundError ../data/processed/train/train.csv


In [5]:
read_array(str(Path(config_params.train_dir).resolve()))

2025-01-08 23:00:04,174 - STAGE 2: MODEL BUILDING - ERROR - Failed to interpret file '/Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/data/processed/train/train.npy' as a pickle


UnpicklingError: Failed to interpret file '/Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/data/processed/train/train.npy' as a pickle

PosixPath('/Users/goldyrana/mess/deep_learning/projects/telco_customer_churn/data/processed/train/train.npy')