In [1]:
%pwd

'e:\\FullStack_Data\\MACHINE_LEARNING\\PROJECTS\\MLPROJECT_part_prediction\\research'

In [2]:
import os
os.chdir('../')
%pwd

'e:\\FullStack_Data\\MACHINE_LEARNING\\PROJECTS\\MLPROJECT_part_prediction'

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str 
    n_estimators: float
    max_depth: float
    min_samples_split: float
    min_samples_leaf: float
    bootstrap: bool


In [None]:
from src.ML_Part_predict.constants import *
from src.ML_Part_predict.utils.common import read_yaml,create_directories

class ConfigurationManager:
    def __init__(self,config_file_p=CONFIG_FILE_PATH,params_file_p=CONFIG_FILE_PATH,schema_file_p=SCHEMA_FILE_PATH):
        self.config=read_yaml(config_file_p)
        self.params=read_yaml(params_file_p)
        self.schema=read_yaml(schema_file_p)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self)->ModelTrainerConfig:
        config=self.config.model_trainer
        params=self.params.random_forest
        schema=self.schema.TARGET_COLUMN

        create_directories([self.config.root_dir])

        model_trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            target_column=schema.name,
            n_estimators=params.n_estimators,
            max_depth=params.max_depth,
            min_samples_split=params.min_samples_split,
            min_samples_split=params.min_samples_split,
            min_samples_leaf=params.min_samples_leaf,
            bootstrap=params.bootstrap

        )

        return model_trainer_config
    
    

In [None]:
import pandas as pd
import os
from src.ML_Part_predict import logger
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import (accuracy_score,precision_score,recall_score)
import joblib



In [None]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config=config

    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)
        logger.info("reading of train data and test data successfull")

        logger.info("X_train,X_test,y_train,y_test split initiated")
        X_train=train_data.drop([self.config.target_column],axis=1)
        y_train=train_data[[self.config.target_column]]
        X_test=test_data.drop([self.config.target_column],axis=1)
        y_test=test_data[[self.config.target_column]]
        logger.info("X_train,X_test,y_train,y_test split successfull")

        random_grid = {'n_estimators': self.config.n_estimators,
               #'max_features': self.config.ma,
               'max_depth': self.config.max_depth,
               'min_samples_split': self.config.min_samples_split,
               'min_samples_leaf': self.config.min_samples_leaf,
               'bootstrap': self.config.bootstrap}
        
        rf_random_CV = RandomizedSearchCV(estimator = RandomForestClassifier(), param_distributions = random_grid, 
                               n_iter = 100, cv = 3,verbose=2, random_state=42, 
                               n_jobs =-1,scoring='neg_mean_squared_error')
        
        rf_random_CV.fit(X_train,y_train)
        logger.info("Randomised search CV fir Completed...")

        rf_random_CV.best_estimator_
        logger.info(f"Randomised search CV best estimators: {rf_random_CV.best_estimator_}")

        rf_random_CV.best_params_
        logger.info(f"Randomised search CV best params: {rf_random_CV.best_params_}")

        rf_best_hyp=RandomForestClassifier(n_estimators=1600,min_samples_split=2,min_samples_leaf=4,max_features='sqrt',max_depth=10,bootstrap=True)

        rf_best_hyp.fit(X_train,y_train)
        logger.info(f"Random forest fitting with best hyperparameters completed...")

        y_pred_hyp=rf_best_hyp.predict(X_test)

        logger.info("Random Forest WITH Best Parameters")
        logger.info(f"recall:{recall_score(y_test,y_pred_hyp)}")
        logger.info(f"precision:{precision_score(y_test,y_pred_hyp)}")
        logger.info(f"accuracy:{accuracy_score(y_test,y_pred_hyp)}")

        joblib.dump(rf_random_CV, os.path.join(self.config.root_dir,self.config.model_name))
        logger.info("Model dump successfully")

In [None]:
try:
    con=ConfigurationManager()
    model_trn=con.get_model_trainer_config()
    model_trn=ModelTrainer(model_trn)
    model_trn.train()
except Exception as e:
    raise e