In [None]:
import os
os.chdir("..")
os.getcwd()

In [None]:
from package.utils import read_yaml
from dataclasses import dataclass


CONFIG = read_yaml("config/config.yaml")

@dataclass
class ModelTrainerConstants:
    ARITFACTS_ROOT_DIR_NAME = CONFIG.ARITFACTS_ROOT_DIR_NAME
    MODEL_ROOT_DIR_NAME = CONFIG.MODEL.ROOT_DIR_NAME
    EVALUATION_FILE_NAME = CONFIG.MODEL.EVALUATION_FILE_NAME

    ESTIMATOR_ROOT_DIR_NAME = CONFIG.MODEL.ESTIMATOR.ROOT_DIR_NAME
    ESTIMATOR_FILE_NAME = CONFIG.MODEL.ESTIMATOR.ESTIMATOR_FILE_NAME
    CONFIG_FILE_NAME = CONFIG.MODEL.ESTIMATOR.CONFIG_FILE_NAME

    PARAMS_FILE_NAME = "params.json"



In [None]:
print("ARITFACTS_ROOT_DIR_NAME:", ModelTrainerConstants.ARITFACTS_ROOT_DIR_NAME)
print("MODEL_ROOT_DIR_NAME:", ModelTrainerConstants.MODEL_ROOT_DIR_NAME)
print("EVALUATION_FILE_NAME:", ModelTrainerConstants.EVALUATION_FILE_NAME)
print("ESTIMATOR_ROOT_DIR_NAME:", ModelTrainerConstants.ESTIMATOR_ROOT_DIR_NAME)
print("ESTIMATOR_FILE_NAME:", ModelTrainerConstants.ESTIMATOR_FILE_NAME)
print("CONFIG_FILE_NAME:", ModelTrainerConstants.CONFIG_FILE_NAME)
print("PARAMS_FILE_NAME:", ModelTrainerConstants.PARAMS_FILE_NAME)

In [None]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ModelTrainerConfigEntity:
    ARITFACTS_ROOT_DIR_PATH: Path
    MODEL_ROOT_DIR_PATH: Path
    EVALUATION_FILE_PATH: Path

    ESTIMATOR_ROOT_DIR_PATH: Path
    ESTIMATOR_FILE_PATH: Path
    CONFIG_FILE_PATH: Path

    PARAMS_FILE_PATH: Path




In [None]:
from dataclasses import dataclass
from pathlib import Path
import os


@dataclass
class ModelTrainerConfig:
    ARITFACTS_ROOT_DIR_PATH = Path(ModelTrainerConstants.ARITFACTS_ROOT_DIR_NAME)
    MODEL_ROOT_DIR_PATH =  os.path.join(ARITFACTS_ROOT_DIR_PATH, ModelTrainerConstants.MODEL_ROOT_DIR_NAME)
    EVALUATION_FILE_PATH = os.path.join(MODEL_ROOT_DIR_PATH, ModelTrainerConstants.EVALUATION_FILE_NAME)

    ESTIMATOR_ROOT_DIR_PATH =  os.path.join(MODEL_ROOT_DIR_PATH, ModelTrainerConstants.ESTIMATOR_ROOT_DIR_NAME)
    ESTIMATOR_FILE_PATH =  os.path.join(ESTIMATOR_ROOT_DIR_PATH, ModelTrainerConstants.ESTIMATOR_FILE_NAME)
    CONFIG_FILE_PATH =  os.path.join(ESTIMATOR_ROOT_DIR_PATH, ModelTrainerConstants.CONFIG_FILE_NAME)

    PARAMS_FILE_PATH = ModelTrainerConstants.PARAMS_FILE_NAME




In [None]:
print("ARITFACTS_ROOT_DIR_PATH:", ModelTrainerConfig.ARITFACTS_ROOT_DIR_PATH)
print("MODEL_ROOT_DIR_PATH:", ModelTrainerConfig.MODEL_ROOT_DIR_PATH)
print("EVALUATION_FILE_PATH:", ModelTrainerConfig.EVALUATION_FILE_PATH)
print("ESTIMATOR_ROOT_DIR_PATH:", ModelTrainerConfig.ESTIMATOR_ROOT_DIR_PATH)
print("ESTIMATOR_FILE_PATH:", ModelTrainerConfig.ESTIMATOR_FILE_PATH)
print("CONFIG_FILE_PATH:", ModelTrainerConfig.CONFIG_FILE_PATH)
print("PARAMS_FILE_PATH:", ModelTrainerConfig.PARAMS_FILE_PATH)

In [None]:
from package.entity import DataTransformationConfigEntity
from package.exception import CustomException
from package.utils import load_json, save_json, create_dirs, save_obj, evaluate_models, get_performance_report
from dataclasses import dataclass
import sys
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    AdaBoostClassifier,
    GradientBoostingClassifier,
    RandomForestClassifier,
)


@dataclass
class ModelTrainerComponents:
    data_transformation_config: DataTransformationConfigEntity
    model_trainer_config: ModelTrainerConfigEntity

    def initiate_training(self):
        try:
            # create required dir's
            create_dirs(self.model_trainer_config.ARITFACTS_ROOT_DIR_PATH)
            create_dirs(self.model_trainer_config.MODEL_ROOT_DIR_PATH)
            create_dirs(self.model_trainer_config.ESTIMATOR_ROOT_DIR_PATH)

            # models list
            models = {
                    "Random Forest": RandomForestClassifier(verbose=1),
                    "Decision Tree": DecisionTreeClassifier(),
                    "Gradient Boosting": GradientBoostingClassifier(verbose=1),
                    "Logistic Regression": LogisticRegression(verbose=1),
                    "AdaBoost": AdaBoostClassifier(),
                }
            
            # collect params
            params = load_json(self.model_trainer_config.PARAMS_FILE_PATH)

            # transformed data collection
            train_data = np.load(self.data_transformation_config.TRAIN_FILE_PATH)
            test_data = np.load(self.data_transformation_config.TEST_FILE_PATH)

            # extract input and output features
            # train data
            X_train = train_data[:, :-1]
            y_train = train_data[:, -1]

            # test data
            X_test = test_data[:, :-1]
            y_test = test_data[:, -1]

            # get evaluation report
            model_performance_report = evaluate_models(X_train, y_train, X_test, y_test, models, params)
            # get evaluation report
            model_performance_report = evaluate_models(X_train, y_train, X_test, y_test, models, params)

            # save evaluation report
            evaluation_report_path = self.model_trainer_config.EVALUATION_FILE_PATH
            save_json(model_performance_report, evaluation_report_path)

            # get best model name 
            best_model_name, _ = max(sorted([(model_name, model_performance_report[model_name]["score"]) for model_name in model_performance_report.keys()]))

            # get best params for best model 
            best_params = model_performance_report[best_model_name]["best_params"]

            # get best performed model from scratch
            model = models[best_model_name]

            # set best params for model
            model.set_params(**best_params)

            # train and save model
            model.fit(X_train, y_train)
            save_obj(model, self.model_trainer_config.ESTIMATOR_FILE_PATH)

            # model prediction
            train_y_pred = model.predict(X_train)
            test_y_pred = model.predict(X_test)

            # get evaluation score
            model_scores = get_performance_report(y_train, y_test, train_y_pred, test_y_pred)            

            # create and save model config report
            model_config = {"Estimator":str(model), "scores":model_scores, "params":best_params}
            config_file_path = self.model_trainer_config.CONFIG_FILE_PATH
            save_json(model_config, config_file_path)

        except Exception as e:
            raise CustomException(e, sys)
        



In [None]:
from dataclasses import  dataclass
from package.configuration import DataTransformationConfig


@dataclass
class ModelTrainerPipeline:

    def main(self)->None:
        """runs data ingestion full pipeline
        """
        mt = ModelTrainerComponents(DataTransformationConfig, ModelTrainerConfig)
        mt.initiate_training()




STAGE_NAME = "Model Training"

if __name__=="__main__":
    print(f"\n>>>>>>>>>>>>>>>>>>>>> {STAGE_NAME} initiated <<<<<<<<<<<<<<<<<<<<<")
    obj = ModelTrainerPipeline()
    obj.main()
    print(f"\n>>>>>>>>>>>>>>>>>>>>> {STAGE_NAME} completed <<<<<<<<<<<<<<<<<<<<<")


In [None]:
from package.utils import load_obj
import numpy as np

test_data = np.load("artifacts/data/transformation/test.npy")
model = load_obj("artifacts/model/estimator/model.h5")
print("test data is ", test_data[0, :-1])
pred = model.predict([test_data[0, :-1]])
print("actual output:", test_data[0, -1])
print("prediction:", pred[0])