In [1]:
import os

In [2]:
%pwd

'd:\\iNeuron_Projects\\End_to_End_ML_Dental_Implant_Sandblasting\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\iNeuron_Projects\\End_to_End_ML_Dental_Implant_Sandblasting'

In [5]:
###
import pandas as pd
import numpy as np
from dataclasses import dataclass
from pathlib import Path
from Dental_Implant_Sandblasting import logger
from Dental_Implant_Sandblasting.utils.common import read_yaml, create_directories
from Dental_Implant_Sandblasting.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH, SCHEMA_FILE_PATH
from sklearn.linear_model import Ridge, ElasticNet, BayesianRidge, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score
import joblib

# Define ModelTrainerConfig dataclass
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    test_size: float
    random_state: int
    models: dict
    param_grids: dict
    alpha: float
    l1_ratio: float
    target_column: str

# Define ConfigurationManager class
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH, schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config['artifacts_root']])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config['model_trainer']
        params = self.params['model_training']
        param_grids = self.params['hyperparameter_tuning']

        create_directories([config['root_dir']])

        # Debug print statements
        print("Debug: Entire params dictionary:")
        print(self.params)
        print("Debug: Keys in params:")
        print(self.params.keys())
        print("Debug: Contents of params['model_training']:")
        print(params)
        print("Debug: Keys in params['model_training']:")
        print(params.keys())

        try:
            alpha = params['models']['elasticnet']['alpha']
            l1_ratio = params['models']['elasticnet']['l1_ratio']
        except KeyError as e:
            logger.error(f"KeyError: {e} - Check the params.yaml file for the correct structure.")
            raise

        target_column = params['target_column']

        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(config['root_dir']),
            train_data_path=Path(config['train_data_path']),
            test_data_path=Path(config['test_data_path']),
            test_size=params['test_size'],
            random_state=params['random_state'],
            models=params['models'],
            param_grids=param_grids,
            alpha=alpha,
            l1_ratio=l1_ratio,
            target_column=target_column
        )
        return model_trainer_config

# Define ModelTrainer class
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.models = {
            "Ridge": Ridge(alpha=self.config.models['ridge']['alpha']),
            "ElasticNet": ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio),
            "BayesianRidge": BayesianRidge(),
            "HuberRegressor": HuberRegressor(epsilon=self.config.models['huber_regressor']['epsilon']),
            "RandomForest": RandomForestRegressor(random_state=self.config.random_state),
            "GradientBoosting": GradientBoostingRegressor(random_state=self.config.random_state),
            "SVR": SVR(),
            "XGBRegressor": XGBRegressor(random_state=self.config.random_state)
        }
        self.param_grids = {
            'RandomForest': self.config.param_grids['random_forest'],
            'GradientBoosting': self.config.param_grids['gradient_boosting'],
            'Ridge': self.config.param_grids['ridge'],
            'ElasticNet': self.config.param_grids['elasticnet'],
            'HuberRegressor': self.config.param_grids['huber'],
            'SVR': self.config.param_grids['svr'],
            'XGBRegressor': self.config.param_grids['xgboost']
        }

    def load_data(self):
        try:
            train_data = pd.read_csv(self.config.train_data_path)
            test_data = pd.read_csv(self.config.test_data_path)

            X_train = train_data.drop(columns=[self.config.target_column])
            y_train = train_data[self.config.target_column]

            X_test = test_data.drop(columns=[self.config.target_column])
            y_test = test_data[self.config.target_column]

            return X_train, y_train, X_test, y_test
        except Exception as e:
            logger.error(f"Error loading data: {e}")
            raise

    def evaluate_models(self, X_train, y_train):
        model_performance = {}

        for model_name, model in self.models.items():
            logger.info(f"Training {model_name}...")
            try:
                cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_absolute_error')
                mae = -cv_scores.mean()

                model_performance[model_name] = {
                    "MAE": mae
                }

                logger.info(f"{model_name} - MAE: {mae:.4f}")
            except Exception as e:
                logger.error(f"Error training {model_name}: {e}")

        return model_performance

    def hyperparameter_tuning(self, X_train, y_train):
        best_models = {}

        for model_name in self.param_grids.keys():
            grid_search = GridSearchCV(
                estimator=self.models[model_name],
                param_grid=self.param_grids[model_name]['param_grid'],
                cv=self.config.param_grids['cv'],
                scoring=self.config.param_grids['scoring'],
                n_jobs=-1,
                verbose=2
            )
            logger.info(f"Tuning {model_name}...")
            try:
                grid_search.fit(X_train, y_train)
                best_models[model_name] = grid_search.best_estimator_
                logger.info(f"Best parameters for {model_name}: {grid_search.best_params_}")
            except Exception as e:
                logger.error(f"Error tuning {model_name}: {e}")

        return best_models

    def save_best_models(self, best_models):
        for model_name, model in best_models.items():
            try:
                joblib.dump(model, self.config.root_dir / f"{model_name}.joblib")
                logger.info(f"Saved best model {model_name} to {self.config.root_dir / f'{model_name}.joblib'}")
            except Exception as e:
                logger.error(f"Error saving model {model_name}: {e}")

    def execute(self):
        try:
            X_train, y_train, X_test, y_test = self.load_data()
            model_performance = self.evaluate_models(X_train, y_train)
            performance_df = pd.DataFrame(model_performance).T
            print("\nModel Performance:\n", performance_df)

            best_models = self.hyperparameter_tuning(X_train, y_train)

            self.save_best_models(best_models)
        except Exception as e:
            logger.exception(e)
            raise e

# Pipeline execution
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.execute()
except Exception as e:
    logger.exception(e)
    raise e




[2024-07-10 18:46:20,147: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-10 18:46:20,190: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-10 18:46:20,203: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-07-10 18:46:20,206: INFO: common: created directory at: artifacts]
[2024-07-10 18:46:20,209: INFO: common: created directory at: artifacts/model_trainer]
Debug: Entire params dictionary:
{'data_preprocessing': {'imputation_strategy': 'mean'}, 'feature_engineering': {'polynomial_degree': 2}, 'data_transformation': {'test_size': 0.2, 'random_state': 42, 'polynomial_features_degree': 2, 'scaling_method': 'StandardScaler'}, 'model_training': {'test_size': 0.2, 'random_state': 42, 'models': {'ridge': {'alpha': 0.1}, 'elasticnet': {'alpha': 0.1, 'l1_ratio': 0.1}, 'bayesian_ridge': {}, 'huber_regressor': {'epsilon': 1.1, 'max_iter': 1000}}, 'target_column': 'Result (1=Passed, 0=Failed)'}, 'hyperparameter_tuning': {'cv': 5, 'scoring'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[2024-07-10 18:46:20,978: INFO: 2799381885: HuberRegressor - MAE: 0.0661]
[2024-07-10 18:46:20,981: INFO: 2799381885: Training RandomForest...]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[2024-07-10 18:46:23,195: INFO: 2799381885: RandomForest - MAE: 0.0076]
[2024-07-10 18:46:23,197: INFO: 2799381885: Training GradientBoosting...]
[2024-07-10 18:46:24,394: INFO: 2799381885: GradientBoosting - MAE: 0.0005]
[2024-07-10 18:46:24,396: INFO: 2799381885: Training SVR...]
[2024-07-10 18:46:24,473: INFO: 2799381885: SVR - MAE: 0.1343]
[2024-07-10 18:46:24,475: INFO: 2799381885: Training XGBRegressor...]
[2024-07-10 18:46:25,286: INFO: 2799381885: XGBRegressor - MAE: 0.0006]

Model Performance:
                        MAE
Ridge             0.151990
ElasticNet        0.131432
BayesianRidge     0.109247
HuberRegressor    0.066115
RandomForest      0.007593
GradientBoosting  0.000538
SVR               0.134326
XGBRegressor      0.000623
[2024-07-10 18:46:25,342: INFO: 2799381885: Tuning RandomForest...]
Fitting 5 folds for each of 108 candidates, totalling 540 fits
[2024-07-10 18:47:59,728: INFO: 2799381885: Best parameters for RandomForest: {'max_depth': None, 'min_samples_leaf':

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[2024-07-10 18:48:58,704: INFO: 2799381885: Best parameters for SVR: {'C': 0.1, 'epsilon': 0.01, 'kernel': 'linear'}]
[2024-07-10 18:48:58,705: INFO: 2799381885: Tuning XGBRegressor...]
Fitting 5 folds for each of 81 candidates, totalling 405 fits
[2024-07-10 18:49:15,700: INFO: 2799381885: Best parameters for XGBRegressor: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}]
[2024-07-10 18:49:15,817: INFO: 2799381885: Saved best model RandomForest to artifacts\model_trainer\RandomForest.joblib]
[2024-07-10 18:49:15,866: INFO: 2799381885: Saved best model GradientBoosting to artifacts\model_trainer\GradientBoosting.joblib]
[2024-07-10 18:49:15,873: INFO: 2799381885: Saved best model Ridge to artifacts\model_trainer\Ridge.joblib]
[2024-07-10 18:49:15,879: INFO: 2799381885: Saved best model ElasticNet to artifacts\model_trainer\ElasticNet.joblib]
[2024-07-10 18:49:15,885: INFO: 2799381885: Saved best model HuberRegressor to artifacts\model_trainer\HuberRegressor.