In [1]:
!pip install inflection feature_engine category_encoders optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting inflection
  Downloading inflection-0.5.1-py2.py3-none-any.whl (9.5 kB)
Collecting feature_engine
  Downloading feature_engine-1.4.0-py2.py3-none-any.whl (276 kB)
[K     |████████████████████████████████| 276 kB 4.2 MB/s 
[?25hCollecting category_encoders
  Downloading category_encoders-2.5.0-py2.py3-none-any.whl (69 kB)
[K     |████████████████████████████████| 69 kB 6.9 MB/s 
[?25hCollecting optuna
  Downloading optuna-2.10.1-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 53.0 MB/s 
Collecting statsmodels>=0.11.1
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 22.9 MB/s 
Collecting alembic
  Downloading alembic-1.8.0-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 38.2 MB/s 
[?25hCollecting colorlog
  Downloading c

In [None]:
import inflection
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from matplotlib import rcParams

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.pipeline import make_pipeline
from imblearn.pipeline import make_pipeline as imblearn_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, FunctionTransformer
from feature_engine.creation import CyclicalFeatures
from feature_engine.discretisation import ArbitraryDiscretiser
from feature_engine.encoding import CountFrequencyEncoder, OneHotEncoder, RareLabelEncoder
from category_encoders.target_encoder import TargetEncoder

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTETomek
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.model_selection import cross_validate
from tqdm import tqdm

import optuna
import joblib
from optuna.samplers import TPESampler

def load_dataset(path):
    df = pd.read_csv(path)
    df.columns = [inflection.underscore(var) for var in list(df.columns)]    
    return df

def data_preparation(df):
    df = df.copy()
    
    # drop duplicated data
    df = df.drop_duplicates()
    
    # parsing to string
    variables = ['operating_systems', 'browser', 'region', 'traffic_type', 'weekend']
    df[variables] = df[variables].astype(str)
    
    # parsing month to integer
    df['month'] = df['month'].map({
        #'Jan': 1, 
        'Feb': 2, 'Mar': 3, 
        #'Apr': 4, 
        'May': 5,
        'June': 6, 'Jul': 7, 'Aug': 8, 
        'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
    })
    
    # parsing revenue to integer
    df['revenue'] = df['revenue'].astype(int)
    
    return df

def split_dataset(df, target='revenue', test_size=0.2, seed=0):
    return train_test_split(
        df,
        test_size=test_size,
        random_state=seed,
        stratify=df[target]
    )

def random_forest_hyperspace(trial):
    return {
      "n_estimators": trial.suggest_int("n_estimators", 10, 300),
      "max_depth": trial.suggest_int("max_depth", 3, 20),
      "criterion": trial.suggest_categorical("criterion", ['gini','entropy']),
      "min_samples_split": trial.suggest_int("min_samples_split", 2, 50),
      "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 50),
      "class_weight": trial.suggest_categorical("class_weight", ['balanced','balanced_subsample']),
      "random_state": 0,
    }

def extra_trees_hyperspace(trial):
    return {
        "n_estimators": trial.suggest_int("n_estimators", 10, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 20),
        "criterion": trial.suggest_categorical("criterion", ['gini','entropy']),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 50),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 50),
        "class_weight": trial.suggest_categorical("class_weight", ['balanced','balanced_subsample']),
        "random_state": 0,
    }

def ada_boost_hyperspace(trial):
    return {
      "n_estimators": trial.suggest_int("n_estimators", 10, 300),
      "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.01, log=True),
      "algorithm": trial.suggest_categorical("algorithm", ['SAMME', 'SAMME.R']),
      "random_state": 0,
    }

def decision_tree_hyperspace(trial):
    return {
      "max_depth": trial.suggest_int("max_depth", 3, 20),
      "criterion": trial.suggest_categorical("criterion", ['gini','entropy']),
      "min_samples_split": trial.suggest_int("min_samples_split", 2, 50),
      "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 50),
      "class_weight": "balanced",
      "random_state": 0,
    }

def xgboost_hyperspace(trial):
    return {
      "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.01, log=True),
      "n_estimators": trial.suggest_int("n_estimators", 10, 300),
      "max_depth": trial.suggest_int("max_depth", 3, 20),
      "min_child_weight": trial.suggest_int("min_child_weight", 2, 50),
      "subsample": trial.suggest_float("subsample", 0.5, 0.8),
      "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 0.8),
      "reg_alpha": trial.suggest_float("reg_alpha", 0, 0.2),
      "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-10, 100),
      "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1, 3),
      "random_state": 0,
      "objective": 'binary:logistic',
      "eval_metric": 'auc',
    }

def cross_validation(X, y, model):
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    cv_results = cross_validate(
        model, 
        X, 
        y,
        cv=cv,
        scoring=["precision", "recall", "f1", "roc_auc"],
        return_estimator=True, 
        return_train_score=True,
    )
    display(
      {
        "features": cv_results['estimator'][0].named_steps['rfe'].get_feature_names_out(),

        "precision_train": f"{cv_results['train_precision'].mean():.3f}  ± {cv_results['train_precision'].std():.3f}",
        "precision_test": f"{cv_results['test_precision'].mean():.3f}  ± {cv_results['test_precision'].std():.3f}",

        "recall_train": f"{cv_results['train_recall'].mean():.3f}  ± {cv_results['train_recall'].std():.3f}",
        "recall_test": f"{cv_results['test_recall'].mean():.3f}  ± {cv_results['test_recall'].std():.3f}",

        "f1_train": f"{cv_results['train_f1'].mean():.3f}  ± {cv_results['train_f1'].std():.3f}",
        "f1_test": f"{cv_results['test_f1'].mean():.3f}  ± {cv_results['test_f1'].std():.3f}",

        "auc_train": f"{cv_results['train_roc_auc'].mean():.3f}  ± {cv_results['train_roc_auc'].std():.3f}",
        "auc_test": f"{cv_results['test_roc_auc'].mean():.3f}  ± {cv_results['test_roc_auc'].std():.3f}"
     }  
    )
    return round(cv_results['test_f1'].mean(), 3)

def objective(trial, X, y, hyperspace, model_obj):
    params = hyperspace(trial)
    n_features_to_select = trial.suggest_int("n_features_to_select", 1, X.shape[1])
    model = imblearn_pipeline(
        CyclicalFeatures(variables=['month'],drop_original=True),
        OneHotEncoder(variables=['visitor_type', 'weekend']),
        RFE(model_obj(random_state=0), n_features_to_select=n_features_to_select, step=1),
        SMOTE(random_state=0),
        model_obj(**params)
    )
    return cross_validation(X, y, model)

#if __name__ == "__main__":
def main():
    # data preparation
    df = load_dataset('./online_shoppers_intention.csv')
    df = data_preparation(df)
    df = df.drop(['operating_systems', 'browser', 'region', 'traffic_type'], axis=1)

    # split dataset into train and test
    full_train, test = split_dataset(df, target='revenue', test_size=0.2)

    # only full_train that use on the optuna study
    target = 'revenue'
    X, y = full_train.drop(target, axis=1), full_train[target]

    # specify the model and initialize the optuna study
    hyperspace_grid = [
      #decision_tree_hyperspace, 
      #ada_boost_hyperspace,
      random_forest_hyperspace,
      #xgboost_hyperspace,
      #extra_trees_hyperspace

    ]
    model_obj_grid = [
      #DecisionTreeClassifier, 
      #AdaBoostClassifier,
      RandomForestClassifier,
      #XGBClassifier,
      #ExtraTreesClassifier
    ]
    
    classifier_name = [
      #"DecisionTreeClassifier",
      #"AdaBoostClassifier",
      "RandomForestClassifier",
      #"XGBClassifier",
      #"ExtraTreesClassifier",
    ]
    
    for i, (hyperspace, model_obj) in enumerate(zip(hyperspace_grid, model_obj_grid)):
        func = lambda trial: objective(trial, X, y, hyperspace, model_obj)
        sampler = TPESampler(seed=0)
            
        study = optuna.create_study(
            direction='maximize', 
            sampler=sampler
        )
        study.optimize(func, n_trials=300)
        
        fig = optuna.visualization.plot_optimization_history(study)
        fig.show()
        
        saved_file = classifier_name[i]
        with open(f"{saved_file}_study.pkl", 'wb') as f_out:
            joblib.dump(study, f_out)
main()

[32m[I 2022-06-17 07:39:42,506][0m A new study created in memory with name: no-name-5042b11b-e5f5-4adc-bfb1-d1d49153cfb4[0m


{'auc_test': '0.930  ± 0.004',
 'auc_train': '0.948  ± 0.001',
 'f1_test': '0.684  ± 0.011',
 'f1_train': '0.705  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.619  ± 0.020',
 'precision_train': '0.640  ± 0.006',
 'recall_test': '0.766  ± 0.012',
 'recall_train': '0.784  ± 0.003'}

[32m[I 2022-06-17 07:40:37,602][0m Trial 0 finished with value: 0.684 and parameters: {'n_estimators': 169, 'max_depth': 15, 'criterion': 'gini', 'min_samples_split': 22, 'min_samples_leaf': 33, 'class_weight': 'balanced_subsample', 'n_features_to_select': 13}. Best is trial 0 with value: 0.684.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.964  ± 0.001',
 'f1_test': '0.690  ± 0.007',
 'f1_train': '0.742  ± 0.003',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.639  ± 0.018',
 'precision_train': '0.693  ± 0.002',
 'recall_test': '0.751  ± 0.017',
 'recall_train': '0.799  ± 0.006'}

[32m[I 2022-06-17 07:41:29,204][0m Trial 1 finished with value: 0.69 and parameters: {'n_estimators': 121, 'max_depth': 17, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 4, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.870  ± 0.009',
 'auc_train': '0.917  ± 0.003',
 'f1_test': '0.660  ± 0.017',
 'f1_train': '0.660  ± 0.004',
 'features': array(['product_related_duration', 'page_values'], dtype=object),
 'precision_test': '0.560  ± 0.021',
 'precision_train': '0.559  ± 0.005',
 'recall_test': '0.805  ± 0.012',
 'recall_train': '0.806  ± 0.004'}

[32m[I 2022-06-17 07:43:07,449][0m Trial 2 finished with value: 0.66 and parameters: {'n_estimators': 236, 'max_depth': 18, 'criterion': 'gini', 'min_samples_split': 24, 'min_samples_leaf': 40, 'class_weight': 'balanced_subsample', 'n_features_to_select': 2}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.928  ± 0.005',
 'auc_train': '0.952  ± 0.001',
 'f1_test': '0.677  ± 0.008',
 'f1_train': '0.704  ± 0.003',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.593  ± 0.016',
 'precision_train': '0.620  ± 0.004',
 'recall_test': '0.789  ± 0.012',
 'recall_train': '0.813  ± 0.006'}

[32m[I 2022-06-17 07:44:22,460][0m Trial 3 finished with value: 0.677 and parameters: {'n_estimators': 284, 'max_depth': 12, 'criterion': 'gini', 'min_samples_split': 39, 'min_samples_leaf': 23, 'class_weight': 'balanced', 'n_features_to_select': 9}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.928  ± 0.005',
 'auc_train': '0.953  ± 0.001',
 'f1_test': '0.677  ± 0.011',
 'f1_train': '0.706  ± 0.002',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.594  ± 0.021',
 'precision_train': '0.623  ± 0.003',
 'recall_test': '0.788  ± 0.014',
 'recall_train': '0.814  ± 0.005'}

[32m[I 2022-06-17 07:45:26,067][0m Trial 4 finished with value: 0.677 and parameters: {'n_estimators': 188, 'max_depth': 14, 'criterion': 'gini', 'min_samples_split': 19, 'min_samples_leaf': 22, 'class_weight': 'balanced', 'n_features_to_select': 9}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.874  ± 0.008',
 'auc_train': '0.906  ± 0.004',
 'f1_test': '0.660  ± 0.018',
 'f1_train': '0.660  ± 0.005',
 'features': array(['product_related_duration', 'page_values'], dtype=object),
 'precision_test': '0.559  ± 0.022',
 'precision_train': '0.559  ± 0.005',
 'recall_test': '0.805  ± 0.012',
 'recall_train': '0.807  ± 0.004'}

[32m[I 2022-06-17 07:47:01,002][0m Trial 5 finished with value: 0.66 and parameters: {'n_estimators': 205, 'max_depth': 6, 'criterion': 'entropy', 'min_samples_split': 19, 'min_samples_leaf': 29, 'class_weight': 'balanced_subsample', 'n_features_to_select': 2}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.922  ± 0.006',
 'auc_train': '0.930  ± 0.002',
 'f1_test': '0.665  ± 0.015',
 'f1_train': '0.670  ± 0.004',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.567  ± 0.020',
 'precision_train': '0.571  ± 0.005',
 'recall_test': '0.805  ± 0.011',
 'recall_train': '0.810  ± 0.005'}

[32m[I 2022-06-17 07:47:48,713][0m Trial 6 finished with value: 0.665 and parameters: {'n_estimators': 70, 'max_depth': 5, 'criterion': 'gini', 'min_samples_split': 24, 'min_samples_leaf': 13, 'class_weight': 'balanced', 'n_features_to_select': 9}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.901  ± 0.009',
 'auc_train': '0.917  ± 0.002',
 'f1_test': '0.660  ± 0.015',
 'f1_train': '0.663  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values'], dtype=object),
 'precision_test': '0.558  ± 0.019',
 'precision_train': '0.560  ± 0.005',
 'recall_test': '0.807  ± 0.011',
 'recall_train': '0.813  ± 0.004'}

[32m[I 2022-06-17 07:48:45,172][0m Trial 7 finished with value: 0.66 and parameters: {'n_estimators': 50, 'max_depth': 6, 'criterion': 'entropy', 'min_samples_split': 6, 'min_samples_leaf': 42, 'class_weight': 'balanced_subsample', 'n_features_to_select': 7}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.893  ± 0.009',
 'auc_train': '0.964  ± 0.001',
 'f1_test': '0.656  ± 0.016',
 'f1_train': '0.697  ± 0.003',
 'features': array(['administrative_duration', 'product_related',
        'product_related_duration', 'exit_rates', 'page_values'],
       dtype=object),
 'precision_test': '0.558  ± 0.019',
 'precision_train': '0.599  ± 0.005',
 'recall_test': '0.797  ± 0.014',
 'recall_train': '0.835  ± 0.004'}

[32m[I 2022-06-17 07:50:18,243][0m Trial 8 finished with value: 0.656 and parameters: {'n_estimators': 294, 'max_depth': 13, 'criterion': 'gini', 'min_samples_split': 15, 'min_samples_leaf': 7, 'class_weight': 'balanced', 'n_features_to_select': 5}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.916  ± 0.008',
 'auc_train': '0.921  ± 0.001',
 'f1_test': '0.671  ± 0.015',
 'f1_train': '0.674  ± 0.006',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.579  ± 0.019',
 'precision_train': '0.582  ± 0.009',
 'recall_test': '0.800  ± 0.012',
 'recall_train': '0.801  ± 0.005'}

[32m[I 2022-06-17 07:50:50,617][0m Trial 9 finished with value: 0.671 and parameters: {'n_estimators': 130, 'max_depth': 4, 'criterion': 'gini', 'min_samples_split': 15, 'min_samples_leaf': 27, 'class_weight': 'balanced_subsample', 'n_features_to_select': 13}. Best is trial 1 with value: 0.69.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.966  ± 0.001',
 'f1_test': '0.693  ± 0.008',
 'f1_train': '0.748  ± 0.003',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.641  ± 0.016',
 'precision_train': '0.701  ± 0.004',
 'recall_test': '0.756  ± 0.020',
 'recall_train': '0.801  ± 0.006'}

[32m[I 2022-06-17 07:51:39,161][0m Trial 10 finished with value: 0.693 and parameters: {'n_estimators': 103, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 2, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.966  ± 0.001',
 'f1_test': '0.692  ± 0.010',
 'f1_train': '0.747  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.639  ± 0.015',
 'precision_train': '0.699  ± 0.004',
 'recall_test': '0.754  ± 0.017',
 'recall_train': '0.801  ± 0.007'}

[32m[I 2022-06-17 07:52:29,010][0m Trial 11 finished with value: 0.692 and parameters: {'n_estimators': 109, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 48, 'min_samples_leaf': 2, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.929  ± 0.004',
 'auc_train': '0.959  ± 0.001',
 'f1_test': '0.689  ± 0.014',
 'f1_train': '0.732  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.630  ± 0.023',
 'precision_train': '0.680  ± 0.006',
 'recall_test': '0.761  ± 0.016',
 'recall_train': '0.793  ± 0.010'}

[32m[I 2022-06-17 07:53:04,337][0m Trial 12 finished with value: 0.689 and parameters: {'n_estimators': 16, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 37, 'min_samples_leaf': 13, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.930  ± 0.005',
 'auc_train': '0.967  ± 0.001',
 'f1_test': '0.689  ± 0.011',
 'f1_train': '0.748  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.637  ± 0.016',
 'precision_train': '0.702  ± 0.006',
 'recall_test': '0.751  ± 0.022',
 'recall_train': '0.802  ± 0.006'}

[32m[I 2022-06-17 07:53:50,972][0m Trial 13 finished with value: 0.689 and parameters: {'n_estimators': 87, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 49, 'min_samples_leaf': 1, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.899  ± 0.008',
 'auc_train': '0.944  ± 0.001',
 'f1_test': '0.660  ± 0.011',
 'f1_train': '0.675  ± 0.004',
 'features': array(['administrative_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values'], dtype=object),
 'precision_test': '0.560  ± 0.017',
 'precision_train': '0.573  ± 0.006',
 'recall_test': '0.805  ± 0.011',
 'recall_train': '0.821  ± 0.002'}

[32m[I 2022-06-17 07:55:04,318][0m Trial 14 finished with value: 0.66 and parameters: {'n_estimators': 121, 'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 35, 'min_samples_leaf': 12, 'class_weight': 'balanced', 'n_features_to_select': 6}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.893  ± 0.012',
 'auc_train': '0.933  ± 0.002',
 'f1_test': '0.659  ± 0.014',
 'f1_train': '0.668  ± 0.004',
 'features': array(['administrative_duration', 'product_related_duration',
        'exit_rates', 'page_values'], dtype=object),
 'precision_test': '0.557  ± 0.018',
 'precision_train': '0.566  ± 0.006',
 'recall_test': '0.806  ± 0.012',
 'recall_train': '0.817  ± 0.005'}

[32m[I 2022-06-17 07:56:23,417][0m Trial 15 finished with value: 0.659 and parameters: {'n_estimators': 95, 'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 43, 'min_samples_leaf': 18, 'class_weight': 'balanced', 'n_features_to_select': 4}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.928  ± 0.004',
 'auc_train': '0.943  ± 0.001',
 'f1_test': '0.681  ± 0.011',
 'f1_train': '0.698  ± 0.004',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.608  ± 0.015',
 'precision_train': '0.624  ± 0.009',
 'recall_test': '0.774  ± 0.011',
 'recall_train': '0.791  ± 0.007'}

[32m[I 2022-06-17 07:57:04,645][0m Trial 16 finished with value: 0.681 and parameters: {'n_estimators': 29, 'max_depth': 17, 'criterion': 'entropy', 'min_samples_split': 31, 'min_samples_leaf': 49, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.004',
 'auc_train': '0.961  ± 0.001',
 'f1_test': '0.693  ± 0.011',
 'f1_train': '0.733  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.645  ± 0.020',
 'precision_train': '0.688  ± 0.004',
 'recall_test': '0.749  ± 0.016',
 'recall_train': '0.785  ± 0.007'}

[32m[I 2022-06-17 07:57:47,891][0m Trial 17 finished with value: 0.693 and parameters: {'n_estimators': 145, 'max_depth': 18, 'criterion': 'entropy', 'min_samples_split': 50, 'min_samples_leaf': 8, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.004',
 'auc_train': '0.963  ± 0.001',
 'f1_test': '0.691  ± 0.009',
 'f1_train': '0.739  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.643  ± 0.019',
 'precision_train': '0.695  ± 0.005',
 'recall_test': '0.747  ± 0.019',
 'recall_train': '0.789  ± 0.004'}

[32m[I 2022-06-17 07:58:32,245][0m Trial 18 finished with value: 0.691 and parameters: {'n_estimators': 146, 'max_depth': 16, 'criterion': 'entropy', 'min_samples_split': 42, 'min_samples_leaf': 8, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.005',
 'auc_train': '0.958  ± 0.001',
 'f1_test': '0.688  ± 0.010',
 'f1_train': '0.730  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.637  ± 0.017',
 'precision_train': '0.681  ± 0.006',
 'recall_test': '0.748  ± 0.015',
 'recall_train': '0.787  ± 0.003'}

[32m[I 2022-06-17 07:59:32,783][0m Trial 19 finished with value: 0.688 and parameters: {'n_estimators': 221, 'max_depth': 18, 'criterion': 'entropy', 'min_samples_split': 30, 'min_samples_leaf': 18, 'class_weight': 'balanced', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.928  ± 0.005',
 'auc_train': '0.949  ± 0.001',
 'f1_test': '0.676  ± 0.010',
 'f1_train': '0.694  ± 0.003',
 'features': array(['administrative', 'administrative_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.587  ± 0.016',
 'precision_train': '0.607  ± 0.004',
 'recall_test': '0.797  ± 0.010',
 'recall_train': '0.811  ± 0.003'}

[32m[I 2022-06-17 08:00:39,344][0m Trial 20 finished with value: 0.676 and parameters: {'n_estimators': 165, 'max_depth': 9, 'criterion': 'entropy', 'min_samples_split': 45, 'min_samples_leaf': 8, 'class_weight': 'balanced', 'n_features_to_select': 8}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.968  ± 0.001',
 'f1_test': '0.691  ± 0.012',
 'f1_train': '0.750  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.645  ± 0.016',
 'precision_train': '0.709  ± 0.006',
 'recall_test': '0.746  ± 0.021',
 'recall_train': '0.796  ± 0.004'}

[32m[I 2022-06-17 08:01:22,849][0m Trial 21 finished with value: 0.691 and parameters: {'n_estimators': 101, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 48, 'min_samples_leaf': 1, 'class_weight': 'balanced', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.962  ± 0.001',
 'f1_test': '0.690  ± 0.010',
 'f1_train': '0.740  ± 0.003',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.632  ± 0.014',
 'precision_train': '0.687  ± 0.004',
 'recall_test': '0.759  ± 0.016',
 'recall_train': '0.802  ± 0.007'}

[32m[I 2022-06-17 08:02:12,137][0m Trial 22 finished with value: 0.69 and parameters: {'n_estimators': 74, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 50, 'min_samples_leaf': 5, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.969  ± 0.001',
 'f1_test': '0.690  ± 0.015',
 'f1_train': '0.756  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.644  ± 0.019',
 'precision_train': '0.718  ± 0.009',
 'recall_test': '0.743  ± 0.022',
 'recall_train': '0.797  ± 0.005'}

[32m[I 2022-06-17 08:03:01,575][0m Trial 23 finished with value: 0.69 and parameters: {'n_estimators': 140, 'max_depth': 18, 'criterion': 'entropy', 'min_samples_split': 42, 'min_samples_leaf': 1, 'class_weight': 'balanced', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.961  ± 0.001',
 'f1_test': '0.689  ± 0.011',
 'f1_train': '0.735  ± 0.002',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.634  ± 0.014',
 'precision_train': '0.682  ± 0.005',
 'recall_test': '0.756  ± 0.018',
 'recall_train': '0.797  ± 0.006'}

[32m[I 2022-06-17 08:03:56,975][0m Trial 24 finished with value: 0.689 and parameters: {'n_estimators': 110, 'max_depth': 16, 'criterion': 'entropy', 'min_samples_split': 39, 'min_samples_leaf': 11, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.926  ± 0.006',
 'auc_train': '0.957  ± 0.001',
 'f1_test': '0.679  ± 0.007',
 'f1_train': '0.713  ± 0.001',
 'features': array(['administrative', 'administrative_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.598  ± 0.016',
 'precision_train': '0.634  ± 0.004',
 'recall_test': '0.787  ± 0.009',
 'recall_train': '0.815  ± 0.005'}

[32m[I 2022-06-17 08:04:50,288][0m Trial 25 finished with value: 0.679 and parameters: {'n_estimators': 52, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 33, 'min_samples_leaf': 17, 'class_weight': 'balanced', 'n_features_to_select': 8}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.005',
 'auc_train': '0.964  ± 0.001',
 'f1_test': '0.690  ± 0.009',
 'f1_train': '0.743  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.643  ± 0.018',
 'precision_train': '0.701  ± 0.008',
 'recall_test': '0.746  ± 0.016',
 'recall_train': '0.791  ± 0.004'}

[32m[I 2022-06-17 08:05:49,396][0m Trial 26 finished with value: 0.69 and parameters: {'n_estimators': 183, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 45, 'min_samples_leaf': 5, 'class_weight': 'balanced_subsample', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.933  ± 0.004',
 'auc_train': '0.970  ± 0.001',
 'f1_test': '0.691  ± 0.012',
 'f1_train': '0.760  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.648  ± 0.023',
 'precision_train': '0.726  ± 0.007',
 'recall_test': '0.741  ± 0.022',
 'recall_train': '0.798  ± 0.003'}

[32m[I 2022-06-17 08:06:34,510][0m Trial 27 finished with value: 0.691 and parameters: {'n_estimators': 155, 'max_depth': 15, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 9, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.956  ± 0.001',
 'f1_test': '0.687  ± 0.010',
 'f1_train': '0.724  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.629  ± 0.017',
 'precision_train': '0.666  ± 0.006',
 'recall_test': '0.759  ± 0.023',
 'recall_train': '0.793  ± 0.007'}

[32m[I 2022-06-17 08:07:14,519][0m Trial 28 finished with value: 0.687 and parameters: {'n_estimators': 55, 'max_depth': 17, 'criterion': 'entropy', 'min_samples_split': 50, 'min_samples_leaf': 16, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.004',
 'auc_train': '0.949  ± 0.001',
 'f1_test': '0.688  ± 0.009',
 'f1_train': '0.706  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.622  ± 0.014',
 'precision_train': '0.643  ± 0.007',
 'recall_test': '0.769  ± 0.012',
 'recall_train': '0.783  ± 0.003'}

[32m[I 2022-06-17 08:08:15,509][0m Trial 29 finished with value: 0.688 and parameters: {'n_estimators': 266, 'max_depth': 15, 'criterion': 'entropy', 'min_samples_split': 40, 'min_samples_leaf': 33, 'class_weight': 'balanced_subsample', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.926  ± 0.006',
 'auc_train': '0.971  ± 0.001',
 'f1_test': '0.683  ± 0.009',
 'f1_train': '0.756  ± 0.003',
 'features': array(['administrative', 'administrative_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.614  ± 0.018',
 'precision_train': '0.695  ± 0.003',
 'recall_test': '0.769  ± 0.013',
 'recall_train': '0.829  ± 0.004'}

[32m[I 2022-06-17 08:09:26,889][0m Trial 30 finished with value: 0.683 and parameters: {'n_estimators': 175, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 28, 'min_samples_leaf': 4, 'class_weight': 'balanced', 'n_features_to_select': 8}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.933  ± 0.004',
 'auc_train': '0.961  ± 0.001',
 'f1_test': '0.691  ± 0.013',
 'f1_train': '0.734  ± 0.006',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.642  ± 0.018',
 'precision_train': '0.690  ± 0.008',
 'recall_test': '0.748  ± 0.020',
 'recall_train': '0.785  ± 0.005'}

[32m[I 2022-06-17 08:10:11,452][0m Trial 31 finished with value: 0.691 and parameters: {'n_estimators': 150, 'max_depth': 16, 'criterion': 'entropy', 'min_samples_split': 45, 'min_samples_leaf': 9, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.968  ± 0.001',
 'f1_test': '0.689  ± 0.009',
 'f1_train': '0.751  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.645  ± 0.017',
 'precision_train': '0.712  ± 0.009',
 'recall_test': '0.741  ± 0.016',
 'recall_train': '0.795  ± 0.005'}

[32m[I 2022-06-17 08:10:55,375][0m Trial 32 finished with value: 0.689 and parameters: {'n_estimators': 100, 'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 1, 'class_weight': 'balanced', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.933  ± 0.004',
 'auc_train': '0.964  ± 0.001',
 'f1_test': '0.693  ± 0.010',
 'f1_train': '0.740  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.646  ± 0.016',
 'precision_train': '0.699  ± 0.007',
 'recall_test': '0.749  ± 0.025',
 'recall_train': '0.787  ± 0.004'}

[32m[I 2022-06-17 08:11:37,882][0m Trial 33 finished with value: 0.693 and parameters: {'n_estimators': 135, 'max_depth': 17, 'criterion': 'entropy', 'min_samples_split': 44, 'min_samples_leaf': 6, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.930  ± 0.005',
 'auc_train': '0.963  ± 0.001',
 'f1_test': '0.693  ± 0.008',
 'f1_train': '0.741  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.642  ± 0.016',
 'precision_train': '0.692  ± 0.004',
 'recall_test': '0.755  ± 0.020',
 'recall_train': '0.797  ± 0.004'}

[32m[I 2022-06-17 08:12:30,006][0m Trial 34 finished with value: 0.693 and parameters: {'n_estimators': 124, 'max_depth': 18, 'criterion': 'entropy', 'min_samples_split': 46, 'min_samples_leaf': 5, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.930  ± 0.006',
 'auc_train': '0.965  ± 0.001',
 'f1_test': '0.688  ± 0.009',
 'f1_train': '0.748  ± 0.003',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.638  ± 0.014',
 'precision_train': '0.701  ± 0.004',
 'recall_test': '0.748  ± 0.017',
 'recall_train': '0.802  ± 0.005'}

[32m[I 2022-06-17 08:13:22,057][0m Trial 35 finished with value: 0.688 and parameters: {'n_estimators': 126, 'max_depth': 18, 'criterion': 'gini', 'min_samples_split': 36, 'min_samples_leaf': 5, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.005',
 'auc_train': '0.955  ± 0.001',
 'f1_test': '0.690  ± 0.012',
 'f1_train': '0.725  ± 0.006',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor'], dtype=object),
 'precision_test': '0.635  ± 0.017',
 'precision_train': '0.672  ± 0.008',
 'recall_test': '0.756  ± 0.017',
 'recall_train': '0.788  ± 0.006'}

[32m[I 2022-06-17 08:14:21,004][0m Trial 36 finished with value: 0.69 and parameters: {'n_estimators': 198, 'max_depth': 14, 'criterion': 'entropy', 'min_samples_split': 44, 'min_samples_leaf': 21, 'class_weight': 'balanced_subsample', 'n_features_to_select': 12}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.856  ± 0.011',
 'auc_train': '0.875  ± 0.002',
 'f1_test': '0.656  ± 0.019',
 'f1_train': '0.673  ± 0.006',
 'features': array(['page_values'], dtype=object),
 'precision_test': '0.569  ± 0.023',
 'precision_train': '0.583  ± 0.009',
 'recall_test': '0.777  ± 0.015',
 'recall_train': '0.798  ± 0.004'}

[32m[I 2022-06-17 08:15:42,949][0m Trial 37 finished with value: 0.656 and parameters: {'n_estimators': 79, 'max_depth': 17, 'criterion': 'gini', 'min_samples_split': 39, 'min_samples_leaf': 14, 'class_weight': 'balanced', 'n_features_to_select': 1}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.957  ± 0.001',
 'f1_test': '0.687  ± 0.011',
 'f1_train': '0.730  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.628  ± 0.018',
 'precision_train': '0.674  ± 0.004',
 'recall_test': '0.759  ± 0.020',
 'recall_train': '0.796  ± 0.009'}

[32m[I 2022-06-17 08:16:35,895][0m Trial 38 finished with value: 0.687 and parameters: {'n_estimators': 133, 'max_depth': 12, 'criterion': 'entropy', 'min_samples_split': 46, 'min_samples_leaf': 10, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.928  ± 0.006',
 'auc_train': '0.963  ± 0.001',
 'f1_test': '0.683  ± 0.009',
 'f1_train': '0.728  ± 0.006',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.608  ± 0.017',
 'precision_train': '0.656  ± 0.008',
 'recall_test': '0.780  ± 0.018',
 'recall_train': '0.817  ± 0.004'}

[32m[I 2022-06-17 08:17:40,302][0m Trial 39 finished with value: 0.683 and parameters: {'n_estimators': 163, 'max_depth': 18, 'criterion': 'gini', 'min_samples_split': 41, 'min_samples_leaf': 6, 'class_weight': 'balanced_subsample', 'n_features_to_select': 9}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.932  ± 0.005',
 'auc_train': '0.961  ± 0.001',
 'f1_test': '0.689  ± 0.010',
 'f1_train': '0.736  ± 0.004',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos',
        'visitor_type_New_Visitor', 'weekend_False'], dtype=object),
 'precision_test': '0.640  ± 0.018',
 'precision_train': '0.690  ± 0.006',
 'recall_test': '0.747  ± 0.020',
 'recall_train': '0.789  ± 0.006'}

[32m[I 2022-06-17 08:18:36,340][0m Trial 40 finished with value: 0.689 and parameters: {'n_estimators': 238, 'max_depth': 13, 'criterion': 'entropy', 'min_samples_split': 50, 'min_samples_leaf': 4, 'class_weight': 'balanced', 'n_features_to_select': 13}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.930  ± 0.005',
 'auc_train': '0.965  ± 0.001',
 'f1_test': '0.691  ± 0.005',
 'f1_train': '0.747  ± 0.005',
 'features': array(['administrative', 'administrative_duration', 'informational',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.639  ± 0.014',
 'precision_train': '0.698  ± 0.004',
 'recall_test': '0.752  ± 0.018',
 'recall_train': '0.802  ± 0.008'}

[32m[I 2022-06-17 08:19:26,654][0m Trial 41 finished with value: 0.691 and parameters: {'n_estimators': 113, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 3, 'class_weight': 'balanced', 'n_features_to_select': 11}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.962  ± 0.001',
 'f1_test': '0.692  ± 0.010',
 'f1_train': '0.737  ± 0.002',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.637  ± 0.016',
 'precision_train': '0.683  ± 0.004',
 'recall_test': '0.759  ± 0.018',
 'recall_train': '0.799  ± 0.007'}

[32m[I 2022-06-17 08:20:21,961][0m Trial 42 finished with value: 0.692 and parameters: {'n_estimators': 111, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 7, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.929  ± 0.006',
 'auc_train': '0.964  ± 0.001',
 'f1_test': '0.682  ± 0.009',
 'f1_train': '0.733  ± 0.002',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin'], dtype=object),
 'precision_test': '0.608  ± 0.018',
 'precision_train': '0.663  ± 0.006',
 'recall_test': '0.778  ± 0.016',
 'recall_train': '0.819  ± 0.005'}

[32m[I 2022-06-17 08:21:28,938][0m Trial 43 finished with value: 0.682 and parameters: {'n_estimators': 138, 'max_depth': 17, 'criterion': 'entropy', 'min_samples_split': 43, 'min_samples_leaf': 7, 'class_weight': 'balanced', 'n_features_to_select': 9}. Best is trial 10 with value: 0.693.[0m


{'auc_test': '0.931  ± 0.005',
 'auc_train': '0.958  ± 0.001',
 'f1_test': '0.691  ± 0.007',
 'f1_train': '0.728  ± 0.002',
 'features': array(['administrative', 'administrative_duration',
        'informational_duration', 'product_related',
        'product_related_duration', 'bounce_rates', 'exit_rates',
        'page_values', 'month_sin', 'month_cos'], dtype=object),
 'precision_test': '0.633  ± 0.012',
 'precision_train': '0.674  ± 0.005',
 'recall_test': '0.761  ± 0.017',
 'recall_train': '0.793  ± 0.007'}

[32m[I 2022-06-17 08:22:25,458][0m Trial 44 finished with value: 0.691 and parameters: {'n_estimators': 119, 'max_depth': 19, 'criterion': 'entropy', 'min_samples_split': 47, 'min_samples_leaf': 14, 'class_weight': 'balanced', 'n_features_to_select': 10}. Best is trial 10 with value: 0.693.[0m


In [None]:
def best_trial_result(clf_name='DecisionTreeClassifier'):
    with open(f"{clf_name}_study.pkl", 'rb') as f_in:
        study = joblib.load(f_in)
    
    print("Best trial until now:")
    print(f"Classifier: {clf_name}")
    print(" Value (f1_test): ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
    print()
    return study

def main():
    classifier_name = [
      #'DecisionTreeClassifier',
      "RandomForestClassifier",
      #"ExtraTreesClassifier",
      #"AdaBoostClassifier",
      #"XGBClassifier",
    ]

    for name in classifier_name:
        display(best_trial_result(clf_name=name))

main()