In [1]:
import os

In [2]:
%pwd

'c:\\Users\\SACHIN\\PycharmProjects\\Obesity-Risk-\\notebook'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\SACHIN\\PycharmProjects\\Obesity-Risk-'

In [24]:
from pathlib import Path
import os
from dataclasses import dataclass


@dataclass(frozen=True)
class ModelTrainerConfig:
    root : Path
    model : Path
    train_set :Path
    test_set: Path
    scaled_train_set: Path
    scaled_test_set: Path
    preprocessing_obj: Path
    grow_policy:  str
    n_estimators: int
    learning_rate: float
    gamma :   float
    subsample: float
    colsample_bytree: float
    max_depth: int
    min_child_weight: int
    reg_lambda: float
    reg_alpha: float
    data: Path
    


@dataclass(frozen= True)
class DataTransformationConfig:
    root : Path
    data: Path
    train_set : Path
    test_set: Path
    preprocessing_obj: Path

In [25]:
from ObesityRisk.constants import *
from ObesityRisk.utils.common import read_yaml, create_directories

In [26]:
class ConfigurationsManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_file])

    def get_transformation_config(self)-> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root])
        create_directories([config.train_set])
        create_directories([config.test_set])
        #create_directories([config.preprocessing_obj])

        data_transformation_config = DataTransformationConfig(
            root = config.root,
            data = config.data,
            train_set = config.train_set,
            test_set = config.test_set,
            preprocessing_obj = config.preprocessing_obj
        )

        return data_transformation_config
        

    
    def get_model_trainer(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.XgbClassifier

        create_directories([config.root])

        model_trainer_config = ModelTrainerConfig(
            root = Path(config.root),
            model = Path(config.model),
            train_set = config.train_set,
            test_set = config.test_set,
            scaled_train_set= config.scaled_train_set,
            scaled_test_set = config.scaled_test_set,
            preprocessing_obj= config.preprocessing_obj,
            grow_policy=  params.grow_policy,
            n_estimators= params.n_estimators,
            learning_rate= params.learning_rate,
            gamma =   params.gamma,
            subsample= params.subsample,
            colsample_bytree= params.colsample_bytree,
            max_depth= params.max_depth,
            min_child_weight= params.min_child_weight,
            reg_lambda= params.reg_lambda,
            reg_alpha= params.reg_alpha,
            data = config.data
            
        )

        return model_trainer_config


In [27]:
import os

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegressionCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.metrics import accuracy_score

from ObesityRisk.utils.common import *
from ObesityRisk import logger
from ObesityRisk.constants import *
from ObesityRisk.components.data_transformation import PrepareTransformation
import warnings
warnings.filterwarnings('ignore')

In [28]:
class PrepareModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def initiate_model_trainer(self):
        train_data = pd.read_csv(self.config.train_set)
        test_data = pd.read_csv(self.config.test_set)

        logger.info("Initializing model trainer")

        logger.info("Slpitting Train data into train and test")
        train_X = train_data.iloc[:, :-1] 
        train_y = train_data.iloc[:, -1]
        test_X = test_data.iloc[:, :-1]
        test_y = test_data.iloc[:, -1]

        remap={'Insufficient_Weight':0 ,'Normal_Weight':1 ,'Obesity_Type_I':2 ,'Obesity_Type_II':3,
                    'Obesity_Type_III':4, 'Overweight_Level_I':5 ,'Overweight_Level_II':6}

        train_y = train_data['NObeyesdad'].map(remap)
        test_y = test_data['NObeyesdad'].map(remap)

        logger.info("loading preprocessing object")
        preprocessor = PrepareTransformation.get_data_transformation_object(self)

        logger.info('applying preprocessing object on train features')
        scaled_train_X = preprocessor.fit_transform(train_X)
        scaled_test_x = preprocessor.transform(test_X)

        
        print("Input Features: ", train_data.iloc[:, :-1])
        print("Target Feature: ", train_data.iloc[:, -1])
        print(scaled_train_X)
        print(train_y)

        scaled_train_X_data = np.array(scaled_train_X)
        scaled_train_X_df = pd.DataFrame(scaled_train_X_data)
        scaled_train_X_df.loc[:, 'NObeyesdad'] = train_y
        print(scaled_train_X_df)

        scaled_test_x_data = np.array(scaled_test_x)
        scaled_test_x_df = pd.DataFrame(scaled_test_x_data)
        scaled_test_x_df.loc[:, 'NObeyesdad'] = test_y
        print(scaled_test_x_df)

        #scaled_train_X_df.to_csv(os.path.join(self.config.scaled_train_set, 'scaled_train_data.csv'), index= False)
        #scaled_test_x_df.to_csv(os.path.join(self.config.scaled_test_set, 'scaled_test_data.csv'), index= False)

        

        logger.info('fitting train data into model') 

        xcls = XGBClassifier(grow_policy= self.config.grow_policy, n_estimators=self.config.n_estimators, 
                             learning_rate=self.config.learning_rate, gamma=self.config.gamma, subsample=self.config.subsample,
                             colsample_bytree=self.config.colsample_bytree, max_depth=self.config.max_depth, 
                             min_child_weight=self.config.min_child_weight, reg_lambda=self.config.reg_lambda, 
                             reg_alpha=self.config.reg_alpha)
        xcls.fit(scaled_train_X, train_y)
        pred = xcls.predict(scaled_test_x)
        ac = accuracy_score(test_y, pred)
        print(ac)
        #rcv = RandomizedSearchCV( estimator= xcls, param_distributions=config.params, n_iter=10,cv=5, n_jobs=1,scoring='accuracy')
        #rcv.fit(X_train, y_train)
        #pred = rcv.predict(X_test)
        #ac = accuracy_score(y_test, pred)
        #print(ac)

        save_pickle(os.path.join(self.config.model, "model.pkl"),xcls)
        save_pickle(os.path.join(self.config.preprocessing_obj, 'preprocessor.pkl'), preprocessor)

        return xcls

In [29]:
try:
    config = ConfigurationsManager()
    #base_model_config = config.get_transformation_config()
    #data_transformation = PrepareTransformation(config=base_model_config)
    #train_data, test_data = data_transformation.data_split()
    #data_transformation.get_data_transformation_object()

    get_model_trainer = config.get_model_trainer()
    model_trainer = PrepareModelTrainer(config=get_model_trainer)
    model_trainer.initiate_model_trainer()
except Exception as e:
    raise e

[2024-03-15 10:33:40,929: INFO: common: yaml file Config\Config.yaml loaded successfully]
[2024-03-15 10:33:40,941: INFO: common: yaml file params.yaml loaded successfully]
[2024-03-15 10:33:40,943: INFO: common: creating directory at artifacts]
[2024-03-15 10:33:40,948: INFO: common: creating directory at artifacts/model_trainer]


[2024-03-15 10:33:41,134: INFO: 817510985: Initializing model trainer]
[2024-03-15 10:33:41,135: INFO: 817510985: Slpitting Train data into train and test]
[2024-03-15 10:33:41,150: INFO: 817510985: loading preprocessing object]
[2024-03-15 10:33:41,361: INFO: data_transformation: numeric columns: Index(['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE'], dtype='object') ]
[2024-03-15 10:33:41,365: INFO: data_transformation: categorical columns: Index(['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE',
       'SCC', 'CALC', 'MTRANS'],
      dtype='object') ]
[2024-03-15 10:33:41,367: INFO: data_transformation: Saving the prerocessing objest]
[2024-03-15 10:33:41,375: INFO: 817510985: applying preprocessing object on train features]
Input Features:            id  Gender        Age    Height      Weight  \
0       9958    Male  17.000000  1.770000   97.000000   
1       7841    Male  22.667596  1.753389   54.877111   
2       9293    Male  21.838323  1.8198