In [1]:
import os

In [2]:
os.chdir('../')

In [3]:
import sys
from src.logger import logging  
from src.exception import CustomException
from src import *
import pandas as pd
import numpy as np
from pathlib import Path
from dataclasses import dataclass
from src.utils.common import read_yaml_file, load_object, save_object, eval_model, save_model_metrics, create_directory

In [4]:
@dataclass(frozen=True)
class ModelTrainingConfig:
    model_path: Path
    model_metrics_path: Path

In [5]:
class ConfigurationManager:
    def __init__(self,
                  config_file_path = CONFIG_FILE_PATH):
        try:
            self.config = read_yaml_file(config_file_path)

            logging.info("Configuration and Parameters files have been read successfully")

            logging.info("Creating directories to store artifacts")
            create_directory([self.config.artifacts_directory])
            logging.info("Directories have been created successfully")
        except Exception as e:
            raise CustomException(e, sys)

    def get_model_config(self) -> ModelTrainingConfig:
        try:
            config = self.config.model_training
            logging.info("Creating directories to store model artifacts")
            create_directory([config.root_dir])

            logging.info("Directories have been created successfully")

            model_config = ModelTrainingConfig(
                model_path = config.model_path,
                model_metrics_path = config.model_metrics_path
            )

            return model_config
        
        except Exception as e:
            raise CustomException(e, sys)
        
        

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier


In [7]:
class ModelBuilding:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config

    def initiate_model_building(self, train_arr, test_arr):
        try:
            logging.info("Initiating model building process")
            logging.info("Splitting data into train and test sets")
            X_train = train_arr[:, :-1]
            y_train = train_arr[:, -1]
            X_test = test_arr[:, :-1]
            y_test = test_arr[:, -1]

            logging.info("Splitting has been done successfully")

            logging.info("Specifying models to be trained")

            models = {
                "LogisticRegression": LogisticRegression(),
                "RandomForestClassifier": RandomForestClassifier(),
                "DecisionTreeClassifier": DecisionTreeClassifier(),
                "GradientBoostingClassifier": GradientBoostingClassifier(),
                "AdaBoostClassifier": AdaBoostClassifier(),
                "SVC": SVC(),
                "KNeighborsClassifier": KNeighborsClassifier(),
                "GaussianNB": GaussianNB(),
                "XGBClassifier": XGBClassifier()
            }

            logging.info("Models have been specified successfully")

            logging.info("Hyperparameter tuning for models")

            params = {
                "LogisticRegression": {
                    "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                    "penalty": ['l1', 'l2', 'elasticnet', 'none']
                },
                "RandomForestClassifier": {
                    "n_estimators": [100, 200, 300, 400, 500],
                    "max_depth": [5, 10, 15, 20, 25, 30],
                    "min_samples_split": [2, 5, 10, 15, 100],
                    "min_samples_leaf": [1, 2, 5, 10]
                },
                "DecisionTreeClassifier": {
                    "max_depth": [5, 10, 15, 20, 25, 30],
                    "min_samples_split": [2, 5, 10, 15, 100],
                    "min_samples_leaf": [1, 2, 5, 10]
                },
                "GradientBoostingClassifier": {
                    "n_estimators": [100, 200, 300, 400, 500],
                    "learning_rate": [0.001, 0.01, 0.1, 1],
                    "subsample": [0.5, 0.7, 1.0],
                    "max_depth": [3, 7, 9]
                },
                "AdaBoostClassifier": {
                    "n_estimators": [50, 100, 200, 300, 400, 500],
                    "learning_rate": [0.001, 0.01, 0.1, 1]
                },
                "SVC": {
                    "C": [0.001, 0.01, 0.1, 1, 10],
                    "kernel": ['linear', 'poly', 'rbf', 'sigmoid'],
                    "degree": [3, 4, 5, 6],
                    "gamma": ['scale', 'auto']
                },
                "KNeighborsClassifier": {
                    "n_neighbors": [3, 5, 7, 9],
                    "weights": ['uniform', 'distance'],
                    "algorithm": ['auto', 'ball_tree', 'kd_tree', 'brute']
                },
                "GradientBoostingClassifier": {
                    "n_estimators": [100, 200, 300, 400, 500],
                    "learning_rate": [0.001, 0.01, 0.1, 1],
                    "subsample": [0.5, 0.7, 1.0],
                    "max_depth": [1, 3, 7, 9, 11, 13, 15]
                },
            }

            logging.info("Hyperparameter tuning has been done successfully")

            logging.info("Model training and evaluation")

            model_report:dict = eval_model(X_train, y_train, X_test, y_test, models, params)

            logging.info("Model training and evaluation has been done successfully")

            logging.info("Saving model and metrics")

            save_model_metrics(model_report, self.config.model_metrics_path)
            
            logging.info("Model and metrics have been saved successfully")

            best_model_score = max(sorted(model_report.values()))

            best_model_name = [k for k, v in model_report.items() if v == best_model_score][0]

            best_model = models[best_model_name]

            if best_model_score < 0.75:
                logging.warning("Model performance is below 75%. Please consider retraining the model")

            logging.info(f'The best model is {best_model_name} with an r2 score of {best_model_score}')

            logging.info("Saving the best model")
            save_object(
                object = best_model,
                object_path = self.config.model_path
            )

            logging.info("Model has been saved successfully")

            return best_model

        except Exception as e:
            raise CustomException(e, sys)

In [8]:
from src.components.data_ingestion import DataIngestion, DataIngestionConfig
from src.components.data_transformation import DataTransformation, DataTransformationConfig


In [13]:
if __name__ == "__main__":
    try:
        config = ConfigurationManager()
        model_config = config.get_model_config()

        training = ModelBuilding(model_config)
        training.initiate_model_building(
            train_arr= 'artifacts\data_transformation\train_arr.csv',
            test_arr= 'artifacts\data_transformation\test_arr.csv'
    )
        
    except Exception as e:
        logging.error(e)
        raise CustomException(e, sys)

CustomException: An error occurred in C:\Users\ayush\AppData\Local\Temp\ipykernel_7832\2149642980.py at line 9 with message An error occurred in C:\Users\ayush\AppData\Local\Temp\ipykernel_7832\45513573.py at line 9 with message (slice(None, None, None), slice(None, -1, None)) and details <module 'sys' (built-in)> and details <module 'sys' (built-in)>