In [40]:
import pandas as pd
import logging
import dill
import pickle
from typing import Tuple, Any
from pathlib import Path
from typing_extensions import Annotated
from model_factory import ModelFactory


logging = logging.getLogger(__name__)

In [26]:
def load_object(file_path: str) -> object:
    logging.info(f"Entered the load_object method of MainUtils class from: {file_path}")
    try:
        with open(file_path, "rb") as file_obj:
            obj = dill.load(file_obj)
        logging.info(f"Successfully loaded the object from {file_path}")
        logging.info("Exited the load_object method of MainUtils class")
        return obj
    except Exception as e:
        raise e

In [24]:
def separate_data(data: pd.DataFrame, target_col: str, yes_no_map=None) -> Tuple[
    Annotated[pd.DataFrame, "Features"],
    Annotated[pd.Series, "Target"]
    ]:
    try:
        X = data.drop(columns=[target_col])
        y = data[target_col]
        if yes_no_map is not None:
            y = y.map(yes_no_map)
        return X, y
    except Exception as e:
            raise e

In [27]:
def load_bin(path: Path) -> Any:
        """load binary data

        Args:
            path (Path): path to binary file

        Returns:
            Any: object stored in the file
        """
        data = joblib.load(path)
        logging.info(f"binary file loaded from: {path}")
        return data


In [None]:

# load data object from
data_train = load_object("transformed/train.npz")
data_test = load_object("transformed/test.npz")

target_col = "cluster"

In [43]:
file_path = "transformed/preprocessor_standard.pkl"
with open(file_path, "rb") as file_obj:
    data = pickle.load(file_obj)

ModuleNotFoundError: No module named 'marketing.utils'; 'marketing' is not a package

In [36]:
import sys
import types
marketing = types.ModuleType("marketing")
sys.modules["marketing"] = marketing
preprocessor = joblib.load("preprocessing.pkl")

ModuleNotFoundError: No module named 'marketing.utils'; 'marketing' is not a package

In [5]:
X_train, y_train = separate_data(data_train, target_col)
X_test, y_test = separate_data(data_test, target_col)

In [6]:
X_train.head()

Unnamed: 0,education,complain,response,family_size,offers_responded_to,parental_status,recency,mnt_wines,mnt_fruits,mnt_fish_products,...,num_store_purchases,age,days_as_customer,income,mnt_meat_products,num_deals_purchases,num_web_purchases,num_catalog_purchases,num_web_visits_month,total_spending
0,-0.457391,-0.091876,-0.423938,1.547793,-0.504664,0.63332,0.374521,-0.826301,-0.609869,-0.501902,...,-0.856799,1.407725,-0.039655,-0.771518,-0.741436,0.734052,-0.70544,-0.369482,1.123306,-0.967458
1,0.530132,-0.091876,-0.423938,0.446703,-0.504664,0.63332,0.615853,-0.47263,-0.660217,-0.557142,...,-0.238192,-0.334833,0.264929,0.1136,0.234534,1.436874,0.189413,0.160492,0.716378,-0.003217
2,-0.457391,-0.091876,-0.423938,0.446703,0.602262,0.63332,1.270895,-0.38716,-0.408476,0.308277,...,-0.547495,-1.081644,0.657941,-0.331321,0.342067,0.151919,0.824199,0.523276,0.306101,0.097936
3,-1.444913,-0.091876,-0.423938,-0.654386,-0.504664,0.63332,1.270895,-0.891141,-0.232257,-0.133639,...,-0.856799,-1.330581,1.222896,-0.749342,-0.741436,0.151919,-0.70544,-0.369482,0.306101,-0.603103
4,0.530132,-0.091876,-0.423938,-0.654386,0.602262,0.63332,-1.38375,0.594277,-0.534346,-0.557142,...,0.380416,1.573683,-0.982883,0.620485,-0.682618,1.1366,1.086691,0.523276,-0.108041,0.381541


In [7]:
config_path = "model_config.yaml"

In [8]:
# Initialise the ModelFactory
model_factory = ModelFactory(config_path)

In [9]:
best_models = model_factory.run(X_train, y_train)

[I 2024-11-26 16:50:35,128] A new study created in memory with name: no-name-6263cdcb-9fb1-46c7-8e1e-965f5cdc03ad


Optimising AdaBoostClassifier...
model_config: {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}...
Cross-validation settings: {'CV': 5, 'scoring': 'accuracy', 'random_state': 42}
study_config["direction"]: maximize
params; {'n_estimators': 919, 'learning_rate': 0.037866605970535656, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:50:42,794] Trial 0 finished with value: 0.9425250151724998 and parameters: {'n_estimators': 919, 'learning_rate': 0.037866605970535656}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 937, 'learning_rate': 0.34525663748825214, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:50:50,042] Trial 1 finished with value: 0.9096310359315914 and parameters: {'n_estimators': 937, 'learning_rate': 0.34525663748825214}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 692, 'learning_rate': 0.5515340668528131, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:50:55,396] Trial 2 finished with value: 0.9101756897651765 and parameters: {'n_estimators': 692, 'learning_rate': 0.5515340668528131}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 552, 'learning_rate': 0.5286105563746923, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:50:59,625] Trial 3 finished with value: 0.9218966402639236 and parameters: {'n_estimators': 552, 'learning_rate': 0.5286105563746923}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 492, 'learning_rate': 0.4446706045542017, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:03,636] Trial 4 finished with value: 0.9297007516222904 and parameters: {'n_estimators': 492, 'learning_rate': 0.4446706045542017}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 472, 'learning_rate': 0.07783355892357503, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:07,291] Trial 5 finished with value: 0.9425234590186895 and parameters: {'n_estimators': 472, 'learning_rate': 0.07783355892357503}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 607, 'learning_rate': 0.012759928375564207, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:12,052] Trial 6 finished with value: 0.930808733135183 and parameters: {'n_estimators': 607, 'learning_rate': 0.012759928375564207}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 932, 'learning_rate': 0.01192417934220611, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:19,458] Trial 7 finished with value: 0.9352748945705793 and parameters: {'n_estimators': 932, 'learning_rate': 0.01192417934220611}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 119, 'learning_rate': 0.20149612908109946, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:20,468] Trial 8 finished with value: 0.9419647998008124 and parameters: {'n_estimators': 119, 'learning_rate': 0.20149612908109946}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 857, 'learning_rate': 0.5072682441308755, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:27,541] Trial 9 finished with value: 0.9046062152783183 and parameters: {'n_estimators': 857, 'learning_rate': 0.5072682441308755}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 258, 'learning_rate': 0.04269003168203068, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:29,859] Trial 10 finished with value: 0.9363844322372824 and parameters: {'n_estimators': 258, 'learning_rate': 0.04269003168203068}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 367, 'learning_rate': 0.058721263969535836, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:32,785] Trial 11 finished with value: 0.9375017506730364 and parameters: {'n_estimators': 367, 'learning_rate': 0.058721263969535836}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 754, 'learning_rate': 0.035267009457613074, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:38,886] Trial 12 finished with value: 0.9397348313907348 and parameters: {'n_estimators': 754, 'learning_rate': 0.035267009457613074}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 397, 'learning_rate': 0.09611245757140606, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:41,989] Trial 13 finished with value: 0.9408490375188684 and parameters: {'n_estimators': 397, 'learning_rate': 0.09611245757140606}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 753, 'learning_rate': 0.024872936201087905, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:50,095] Trial 14 finished with value: 0.9397379436983553 and parameters: {'n_estimators': 753, 'learning_rate': 0.024872936201087905}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 212, 'learning_rate': 0.12593500953942036, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:52,083] Trial 15 finished with value: 0.942520346711069 and parameters: {'n_estimators': 212, 'learning_rate': 0.12593500953942036}. Best is trial 0 with value: 0.9425250151724998.


params; {'n_estimators': 420, 'learning_rate': 0.07957983364839448, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:55,805] Trial 16 finished with value: 0.9430821182365665 and parameters: {'n_estimators': 420, 'learning_rate': 0.07957983364839448}. Best is trial 16 with value: 0.9430821182365665.


params; {'n_estimators': 316, 'learning_rate': 0.021538839072600055, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:51:58,476] Trial 17 finished with value: 0.9263487963150278 and parameters: {'n_estimators': 316, 'learning_rate': 0.021538839072600055}. Best is trial 16 with value: 0.9430821182365665.


params; {'n_estimators': 674, 'learning_rate': 0.17361132657319867, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:52:04,030] Trial 18 finished with value: 0.9380604098909137 and parameters: {'n_estimators': 674, 'learning_rate': 0.17361132657319867}. Best is trial 16 with value: 0.9430821182365665.


params; {'n_estimators': 90, 'learning_rate': 0.05310706483172514, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}


[I 2024-11-26 16:52:04,779] Trial 19 finished with value: 0.9263472401612176 and parameters: {'n_estimators': 90, 'learning_rate': 0.05310706483172514}. Best is trial 16 with value: 0.9430821182365665.
[I 2024-11-26 16:52:04,780] A new study created in memory with name: no-name-3c23f0fe-8850-4935-b381-3cc41d4b6cc0


params; {'n_estimators': 420, 'learning_rate': 0.07957983364839448, 'algorithm': 'SAMME'}
model_config; {'module': 'sklearn.ensemble', 'class': 'AdaBoostClassifier', 'short_name': 'AdaBoost', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 1000}, 'learning_rate': {'type': 'float', 'min': 0.01, 'max': 1.0, 'log': True}, 'algorithm': 'SAMME'}}
Best AdaBoostClassifier Params: {'n_estimators': 420, 'learning_rate': 0.07957983364839448}
Best AdaBoostClassifier Score: 0.9431
Optimising RandomForestClassifier...
model_config: {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}...
Cross-validation settings: {'CV': 5, 'scoring': 'accuracy', 'random_state': 42}
study_config["direction"]: maximize
par

[I 2024-11-26 16:52:05,394] Trial 0 finished with value: 0.9542475218250572 and parameters: {'n_estimators': 104, 'max_depth': 14, 'min_samples_split': 7, 'min_samples_leaf': 10}. Best is trial 0 with value: 0.9542475218250572.


params; {'n_estimators': 81, 'max_depth': 26, 'min_samples_split': 12, 'min_samples_leaf': 9}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:05,884] Trial 1 finished with value: 0.950903347286846 and parameters: {'n_estimators': 81, 'max_depth': 26, 'min_samples_split': 12, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.9542475218250572.


params; {'n_estimators': 94, 'max_depth': 26, 'min_samples_split': 10, 'min_samples_leaf': 10}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:06,453] Trial 2 finished with value: 0.9548061810429342 and parameters: {'n_estimators': 94, 'max_depth': 26, 'min_samples_split': 10, 'min_samples_leaf': 10}. Best is trial 2 with value: 0.9548061810429342.


params; {'n_estimators': 156, 'max_depth': 27, 'min_samples_split': 13, 'min_samples_leaf': 8}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:07,382] Trial 3 finished with value: 0.9575963648246993 and parameters: {'n_estimators': 156, 'max_depth': 27, 'min_samples_split': 13, 'min_samples_leaf': 8}. Best is trial 3 with value: 0.9575963648246993.


params; {'n_estimators': 50, 'max_depth': 29, 'min_samples_split': 11, 'min_samples_leaf': 13}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:07,682] Trial 4 finished with value: 0.9509017911330357 and parameters: {'n_estimators': 50, 'max_depth': 29, 'min_samples_split': 11, 'min_samples_leaf': 13}. Best is trial 3 with value: 0.9575963648246993.


params; {'n_estimators': 111, 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 16}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:08,368] Trial 5 finished with value: 0.9514557818894819 and parameters: {'n_estimators': 111, 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 16}. Best is trial 3 with value: 0.9575963648246993.


params; {'n_estimators': 173, 'max_depth': 8, 'min_samples_split': 16, 'min_samples_leaf': 2}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:09,432] Trial 6 finished with value: 0.9581503555811457 and parameters: {'n_estimators': 173, 'max_depth': 8, 'min_samples_split': 16, 'min_samples_leaf': 2}. Best is trial 6 with value: 0.9581503555811457.


params; {'n_estimators': 284, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 14}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:11,110] Trial 7 finished with value: 0.9514588941971024 and parameters: {'n_estimators': 284, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 14}. Best is trial 6 with value: 0.9581503555811457.


params; {'n_estimators': 173, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 3}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:12,191] Trial 8 finished with value: 0.9581519117349557 and parameters: {'n_estimators': 173, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 3}. Best is trial 8 with value: 0.9581519117349557.


params; {'n_estimators': 268, 'max_depth': 26, 'min_samples_split': 16, 'min_samples_leaf': 3}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:13,852] Trial 9 finished with value: 0.9587090147990226 and parameters: {'n_estimators': 268, 'max_depth': 26, 'min_samples_split': 16, 'min_samples_leaf': 3}. Best is trial 9 with value: 0.9587090147990226.


params; {'n_estimators': 279, 'max_depth': 17, 'min_samples_split': 20, 'min_samples_leaf': 19}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:15,379] Trial 10 finished with value: 0.9486687104153375 and parameters: {'n_estimators': 279, 'max_depth': 17, 'min_samples_split': 20, 'min_samples_leaf': 19}. Best is trial 9 with value: 0.9587090147990226.


params; {'n_estimators': 226, 'max_depth': 21, 'min_samples_split': 16, 'min_samples_leaf': 1}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:16,834] Trial 11 finished with value: 0.9570314809915812 and parameters: {'n_estimators': 226, 'max_depth': 21, 'min_samples_split': 16, 'min_samples_leaf': 1}. Best is trial 9 with value: 0.9587090147990226.


params; {'n_estimators': 224, 'max_depth': 21, 'min_samples_split': 2, 'min_samples_leaf': 5}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:18,350] Trial 12 finished with value: 0.9598278893885871 and parameters: {'n_estimators': 224, 'max_depth': 21, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.9598278893885871.


params; {'n_estimators': 232, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 5}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:19,855] Trial 13 finished with value: 0.9609389832091002 and parameters: {'n_estimators': 232, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 5}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 226, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 6}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:21,421] Trial 14 finished with value: 0.9592661178630895 and parameters: {'n_estimators': 226, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 6}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 214, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 5}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:22,889] Trial 15 finished with value: 0.9603818801450335 and parameters: {'n_estimators': 214, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 245, 'max_depth': 2, 'min_samples_split': 4, 'min_samples_leaf': 6}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:23,852] Trial 16 finished with value: 0.9375017506730365 and parameters: {'n_estimators': 245, 'max_depth': 2, 'min_samples_split': 4, 'min_samples_leaf': 6}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 201, 'max_depth': 13, 'min_samples_split': 5, 'min_samples_leaf': 7}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:25,125] Trial 17 finished with value: 0.9587105709528329 and parameters: {'n_estimators': 201, 'max_depth': 13, 'min_samples_split': 5, 'min_samples_leaf': 7}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 147, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 4}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:26,247] Trial 18 finished with value: 0.9592676740168997 and parameters: {'n_estimators': 147, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 200, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 12}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}


[I 2024-11-26 16:52:27,419] Trial 19 finished with value: 0.9531286472354928 and parameters: {'n_estimators': 200, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 12}. Best is trial 13 with value: 0.9609389832091002.


params; {'n_estimators': 232, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 5}
model_config; {'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'short_name': 'RandomForest', 'parameters': {'n_estimators': {'type': 'int', 'min': 50, 'max': 300}, 'max_depth': {'type': 'int', 'min': 2, 'max': 30}, 'min_samples_split': {'type': 'int', 'min': 2, 'max': 20}, 'min_samples_leaf': {'type': 'int', 'min': 1, 'max': 20}}}
Best RandomForestClassifier Params: {'n_estimators': 232, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 5}
Best RandomForestClassifier Score: 0.9609


In [13]:
# Display the results
for best_model in best_models:
    print(f"Model: {best_model.model_name}")
    print(f"best_model: {best_model.best_model}")
    print(f"Best Parameters: {best_model.best_params}")
    print(f"Best Score: {best_model.best_score:.4f}")

Model: AdaBoostClassifier
best_model: AdaBoostClassifier(algorithm='SAMME', learning_rate=0.07957983364839448,
                   n_estimators=420)
Best Parameters: {'n_estimators': 420, 'learning_rate': 0.07957983364839448}
Best Score: 0.9431
Model: RandomForestClassifier
best_model: RandomForestClassifier(max_depth=22, min_samples_leaf=5, min_samples_split=3,
                       n_estimators=232)
Best Parameters: {'n_estimators': 232, 'max_depth': 22, 'min_samples_split': 3, 'min_samples_leaf': 5}
Best Score: 0.9609


In [14]:
from sklearn.metrics import accuracy_score, f1_score, classification_report, precision_score, recall_score

#Use the best models for predictions
for best_model in best_models:
    print(f"\nEvaluating Model: {best_model.model_name}")

    # Train the best model on the training set
    trained_model = best_model.best_model.fit(X_train, y_train)
    y_pred = trained_model.predict(X_test)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")
    f1 = f1_score(y_test, y_pred, average="weighted")

    print(f"Test Accuracy: {accuracy:.4f}")
    print(f"Test Precision: {precision:.4f}")
    print(f"Test Recall: {recall:.4f}")
    print(f"Test F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    print(f"Predictions for {best_model.model_name}: {y_pred[:5]}")
print(f"Truth value for y_test: {y_test[:5].to_list()}")


Evaluating Model: AdaBoostClassifier
Test Accuracy: 0.2790
Test Precision: 0.2928
Test Recall: 0.2790
Test F1 Score: 0.2835

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       192
           1       0.09      0.13      0.10       134
           2       0.98      0.88      0.93       122

    accuracy                           0.28       448
   macro avg       0.36      0.34      0.34       448
weighted avg       0.29      0.28      0.28       448

Predictions for AdaBoostClassifier: [1 1 2 2 1]

Evaluating Model: RandomForestClassifier
Test Accuracy: 0.2545
Test Precision: 0.2863
Test Recall: 0.2545
Test F1 Score: 0.2686

Classification Report:
              precision    recall  f1-score   support

           0       0.01      0.01      0.01       192
           1       0.04      0.05      0.04       134
           2       0.99      0.86      0.92       122

    accuracy                           0.25       4