In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pld
import seaborn as sns
import optuna
from functools import partial

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import roc_auc_score

In [46]:
X_train, y_train = pd.read_csv('../../Data/titanic_xtrain.csv'), pd.read_csv('../../Data/titanic_ytrain.csv')
X_test, y_test = pd.read_csv('../../Data/titanic_xtest.csv'), pd.read_csv('../../Data/titanic_ytest.csv')

In [47]:
X_train = X_train.iloc[:, 1:]
X_test = X_test.iloc[:, 1:]
y_train = y_train.Survived
y_test = y_test.Survived

In [86]:
def objective(trial, X, y):
    grid_param = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000, 10),
        'max_depth': trial.suggest_int('max_depth', 2, 100),
        'max_features': trial.suggest_int('max_features', 1, 40),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5),
        'min_samples_split': trial.suggest_int('min_samples_split', 8, 13),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'criterion': trial.suggest_categorical('criterion', ['entropy'])
    }
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=True)
    model = RandomForestClassifier(**grid_param)
    score = cross_val_score(model, X, y, scoring='accuracy').mean()
    print(f'score: {score}')
    return score

In [87]:
optimize = partial(objective, X=X_train, y=y_train)
study = optuna.create_study(direction='maximize')

[32m[I 2021-11-21 17:56:17,671][0m A new study created in memory with name: no-name-2ad7b8ae-0861-49e3-a1b6-fa9d91d5c904[0m


In [88]:
# Opimization
# Recevies the function to be optimized and the number of trials
study.optimize(optimize, n_trials=100)

[32m[I 2021-11-21 17:56:21,594][0m Trial 0 finished with value: 0.8245604011934236 and parameters: {'n_estimators': 650, 'max_depth': 20, 'max_features': 6, 'min_samples_leaf': 3, 'min_samples_split': 13, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 0 with value: 0.8245604011934236.[0m


score: 0.8245604011934236


[32m[I 2021-11-21 17:56:25,011][0m Trial 1 finished with value: 0.8335364692439533 and parameters: {'n_estimators': 530, 'max_depth': 63, 'max_features': 21, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 1 with value: 0.8335364692439533.[0m


score: 0.8335364692439533


[32m[I 2021-11-21 17:56:25,738][0m Trial 2 finished with value: 0.8132990541484162 and parameters: {'n_estimators': 70, 'max_depth': 100, 'max_features': 40, 'min_samples_leaf': 2, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 1 with value: 0.8335364692439533.[0m


score: 0.8132990541484162


[32m[I 2021-11-21 17:56:27,020][0m Trial 3 finished with value: 0.8189360756681265 and parameters: {'n_estimators': 280, 'max_depth': 84, 'max_features': 4, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 1 with value: 0.8335364692439533.[0m


score: 0.8189360756681265


[32m[I 2021-11-21 17:56:28,585][0m Trial 4 finished with value: 0.8290484352186885 and parameters: {'n_estimators': 320, 'max_depth': 49, 'max_features': 6, 'min_samples_leaf': 2, 'min_samples_split': 12, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 1 with value: 0.8335364692439533.[0m


score: 0.8290484352186885


[32m[I 2021-11-21 17:56:31,928][0m Trial 5 finished with value: 0.8346664127467784 and parameters: {'n_estimators': 580, 'max_depth': 82, 'max_features': 12, 'min_samples_leaf': 3, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 5 with value: 0.8346664127467784.[0m


score: 0.8346664127467784


[32m[I 2021-11-21 17:56:40,243][0m Trial 6 finished with value: 0.7986796165809688 and parameters: {'n_estimators': 690, 'max_depth': 23, 'max_features': 38, 'min_samples_leaf': 3, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 5 with value: 0.8346664127467784.[0m


score: 0.7986796165809688


[32m[I 2021-11-21 17:56:48,239][0m Trial 7 finished with value: 0.8357773122579826 and parameters: {'n_estimators': 980, 'max_depth': 28, 'max_features': 22, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 7 with value: 0.8357773122579826.[0m


score: 0.8357773122579826


[32m[I 2021-11-21 17:56:53,826][0m Trial 8 finished with value: 0.8335491652383672 and parameters: {'n_estimators': 900, 'max_depth': 45, 'max_features': 13, 'min_samples_leaf': 3, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 7 with value: 0.8357773122579826.[0m


score: 0.8335491652383672


[32m[I 2021-11-21 17:56:59,405][0m Trial 9 finished with value: 0.8234241096933917 and parameters: {'n_estimators': 590, 'max_depth': 94, 'max_features': 36, 'min_samples_leaf': 4, 'min_samples_split': 13, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 7 with value: 0.8357773122579826.[0m


score: 0.8234241096933917


[32m[I 2021-11-21 17:57:04,511][0m Trial 10 finished with value: 0.8279184917158636 and parameters: {'n_estimators': 990, 'max_depth': 3, 'max_features': 28, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 7 with value: 0.8357773122579826.[0m


score: 0.8279184917158636


[32m[I 2021-11-21 17:57:10,327][0m Trial 11 finished with value: 0.830146638735479 and parameters: {'n_estimators': 810, 'max_depth': 71, 'max_features': 17, 'min_samples_leaf': 1, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 7 with value: 0.8357773122579826.[0m


score: 0.830146638735479


[32m[I 2021-11-21 17:57:13,160][0m Trial 12 finished with value: 0.8380308512664254 and parameters: {'n_estimators': 380, 'max_depth': 35, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8380308512664254


[32m[I 2021-11-21 17:57:16,056][0m Trial 13 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 360, 'max_depth': 36, 'max_features': 27, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:57:18,976][0m Trial 14 finished with value: 0.8324065257411286 and parameters: {'n_estimators': 350, 'max_depth': 35, 'max_features': 29, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8324065257411286


[32m[I 2021-11-21 17:57:22,350][0m Trial 15 finished with value: 0.8357773122579827 and parameters: {'n_estimators': 430, 'max_depth': 9, 'max_features': 29, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8357773122579827


[32m[I 2021-11-21 17:57:23,693][0m Trial 16 finished with value: 0.8346537167523647 and parameters: {'n_estimators': 170, 'max_depth': 41, 'max_features': 25, 'min_samples_leaf': 4, 'min_samples_split': 12, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8346537167523647


[32m[I 2021-11-21 17:57:27,735][0m Trial 17 finished with value: 0.8312765822383037 and parameters: {'n_estimators': 450, 'max_depth': 63, 'max_features': 33, 'min_samples_leaf': 5, 'min_samples_split': 12, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8312765822383037


[32m[I 2021-11-21 17:57:29,225][0m Trial 18 finished with value: 0.8324128737383354 and parameters: {'n_estimators': 220, 'max_depth': 58, 'max_features': 18, 'min_samples_leaf': 4, 'min_samples_split': 9, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8324128737383354


[32m[I 2021-11-21 17:57:32,915][0m Trial 19 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 450, 'max_depth': 34, 'max_features': 24, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:57:37,287][0m Trial 20 finished with value: 0.8042785501174381 and parameters: {'n_estimators': 440, 'max_depth': 14, 'max_features': 33, 'min_samples_leaf': 1, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8042785501174381


[32m[I 2021-11-21 17:57:40,259][0m Trial 21 finished with value: 0.8346537167523647 and parameters: {'n_estimators': 380, 'max_depth': 36, 'max_features': 25, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8346537167523647


[32m[I 2021-11-21 17:57:42,045][0m Trial 22 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 220, 'max_depth': 29, 'max_features': 26, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:57:42,750][0m Trial 23 finished with value: 0.8335428172411603 and parameters: {'n_estimators': 100, 'max_depth': 26, 'max_features': 17, 'min_samples_leaf': 4, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 12 with value: 0.8380308512664254.[0m


score: 0.8335428172411603


[32m[I 2021-11-21 17:57:44,745][0m Trial 24 finished with value: 0.8391544467720433 and parameters: {'n_estimators': 260, 'max_depth': 54, 'max_features': 24, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8391544467720433


[32m[I 2021-11-21 17:57:46,415][0m Trial 25 finished with value: 0.8290357392242749 and parameters: {'n_estimators': 190, 'max_depth': 55, 'max_features': 31, 'min_samples_leaf': 4, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8290357392242749


[32m[I 2021-11-21 17:57:48,057][0m Trial 26 finished with value: 0.8279121437186567 and parameters: {'n_estimators': 250, 'max_depth': 48, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8279121437186567


[32m[I 2021-11-21 17:57:48,733][0m Trial 27 finished with value: 0.8279248397130706 and parameters: {'n_estimators': 120, 'max_depth': 71, 'max_features': 11, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8279248397130706


[32m[I 2021-11-21 17:57:50,514][0m Trial 28 finished with value: 0.8357900082523964 and parameters: {'n_estimators': 280, 'max_depth': 15, 'max_features': 15, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357900082523964


[32m[I 2021-11-21 17:57:51,552][0m Trial 29 finished with value: 0.8312892782327175 and parameters: {'n_estimators': 150, 'max_depth': 42, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 13, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8312892782327175


[32m[I 2021-11-21 17:57:51,873][0m Trial 30 finished with value: 0.8290547832158953 and parameters: {'n_estimators': 50, 'max_depth': 28, 'max_features': 9, 'min_samples_leaf': 2, 'min_samples_split': 12, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8290547832158953


[32m[I 2021-11-21 17:57:53,936][0m Trial 31 finished with value: 0.8335428172411603 and parameters: {'n_estimators': 290, 'max_depth': 16, 'max_features': 15, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8335428172411603


[32m[I 2021-11-21 17:57:55,760][0m Trial 32 finished with value: 0.8335364692439537 and parameters: {'n_estimators': 240, 'max_depth': 18, 'max_features': 20, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8335364692439537


[32m[I 2021-11-21 17:57:59,285][0m Trial 33 finished with value: 0.8357900082523964 and parameters: {'n_estimators': 510, 'max_depth': 10, 'max_features': 19, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357900082523964


[32m[I 2021-11-21 17:58:02,929][0m Trial 34 finished with value: 0.82790579572145 and parameters: {'n_estimators': 530, 'max_depth': 8, 'max_features': 26, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.82790579572145


[32m[I 2021-11-21 17:58:05,160][0m Trial 35 finished with value: 0.6805306925664952 and parameters: {'n_estimators': 710, 'max_depth': 2, 'max_features': 1, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.6805306925664952


[32m[I 2021-11-21 17:58:07,078][0m Trial 36 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 290, 'max_depth': 31, 'max_features': 15, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:58:10,602][0m Trial 37 finished with value: 0.8335428172411603 and parameters: {'n_estimators': 500, 'max_depth': 22, 'max_features': 20, 'min_samples_leaf': 3, 'min_samples_split': 11, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8335428172411603


[32m[I 2021-11-21 17:58:13,613][0m Trial 38 finished with value: 0.8369136037580143 and parameters: {'n_estimators': 390, 'max_depth': 56, 'max_features': 21, 'min_samples_leaf': 4, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8369136037580143


[32m[I 2021-11-21 17:58:16,193][0m Trial 39 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 340, 'max_depth': 53, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:58:19,413][0m Trial 40 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 410, 'max_depth': 64, 'max_features': 22, 'min_samples_leaf': 2, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:58:22,893][0m Trial 41 finished with value: 0.8346537167523647 and parameters: {'n_estimators': 420, 'max_depth': 72, 'max_features': 23, 'min_samples_leaf': 2, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8346537167523647


[32m[I 2021-11-21 17:58:25,819][0m Trial 42 finished with value: 0.8279121437186567 and parameters: {'n_estimators': 320, 'max_depth': 53, 'max_features': 31, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8279121437186567


[32m[I 2021-11-21 17:58:29,047][0m Trial 43 finished with value: 0.8335237732495397 and parameters: {'n_estimators': 400, 'max_depth': 63, 'max_features': 22, 'min_samples_leaf': 2, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8335237732495397


[32m[I 2021-11-21 17:58:34,000][0m Trial 44 finished with value: 0.823411413698978 and parameters: {'n_estimators': 570, 'max_depth': 78, 'max_features': 26, 'min_samples_leaf': 3, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.823411413698978


[32m[I 2021-11-21 17:58:35,668][0m Trial 45 finished with value: 0.8312702342410969 and parameters: {'n_estimators': 190, 'max_depth': 60, 'max_features': 23, 'min_samples_leaf': 2, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8312702342410969


[32m[I 2021-11-21 17:58:39,157][0m Trial 46 finished with value: 0.8177616961848537 and parameters: {'n_estimators': 370, 'max_depth': 66, 'max_features': 28, 'min_samples_leaf': 1, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8177616961848537


[32m[I 2021-11-21 17:58:42,978][0m Trial 47 finished with value: 0.8335301212467467 and parameters: {'n_estimators': 470, 'max_depth': 51, 'max_features': 22, 'min_samples_leaf': 3, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8335301212467467


[32m[I 2021-11-21 17:58:47,791][0m Trial 48 finished with value: 0.8324001777439218 and parameters: {'n_estimators': 630, 'max_depth': 46, 'max_features': 30, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8324001777439218


[32m[I 2021-11-21 17:58:50,646][0m Trial 49 finished with value: 0.8324001777439218 and parameters: {'n_estimators': 320, 'max_depth': 90, 'max_features': 27, 'min_samples_leaf': 4, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8324001777439218


[32m[I 2021-11-21 17:58:53,235][0m Trial 50 finished with value: 0.8357773122579826 and parameters: {'n_estimators': 330, 'max_depth': 41, 'max_features': 25, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357773122579826


[32m[I 2021-11-21 17:58:54,911][0m Trial 51 finished with value: 0.8391480987748364 and parameters: {'n_estimators': 230, 'max_depth': 56, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8391480987748364


[32m[I 2021-11-21 17:58:57,464][0m Trial 52 finished with value: 0.8346537167523647 and parameters: {'n_estimators': 390, 'max_depth': 58, 'max_features': 17, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8346537167523647


[32m[I 2021-11-21 17:58:59,196][0m Trial 53 finished with value: 0.8391480987748364 and parameters: {'n_estimators': 240, 'max_depth': 54, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8391480987748364


[32m[I 2021-11-21 17:59:00,900][0m Trial 54 finished with value: 0.8380245032692185 and parameters: {'n_estimators': 250, 'max_depth': 55, 'max_features': 18, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8380245032692185


[32m[I 2021-11-21 17:59:01,780][0m Trial 55 finished with value: 0.8357900082523964 and parameters: {'n_estimators': 130, 'max_depth': 56, 'max_features': 18, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357900082523964


[32m[I 2021-11-21 17:59:03,597][0m Trial 56 finished with value: 0.8369072557608075 and parameters: {'n_estimators': 260, 'max_depth': 68, 'max_features': 19, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8369072557608075


[32m[I 2021-11-21 17:59:04,870][0m Trial 57 finished with value: 0.8301593347298926 and parameters: {'n_estimators': 200, 'max_depth': 50, 'max_features': 14, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8301593347298926


[32m[I 2021-11-21 17:59:06,751][0m Trial 58 finished with value: 0.8357773122579826 and parameters: {'n_estimators': 230, 'max_depth': 43, 'max_features': 24, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 24 with value: 0.8391544467720433.[0m


score: 0.8357773122579826


[32m[I 2021-11-21 17:59:07,373][0m Trial 59 finished with value: 0.8391607947692503 and parameters: {'n_estimators': 80, 'max_depth': 46, 'max_features': 18, 'min_samples_leaf': 4, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8391607947692503


[32m[I 2021-11-21 17:59:07,992][0m Trial 60 finished with value: 0.8279248397130704 and parameters: {'n_estimators': 100, 'max_depth': 37, 'max_features': 12, 'min_samples_leaf': 5, 'min_samples_split': 12, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8279248397130704


[32m[I 2021-11-21 17:59:09,152][0m Trial 61 finished with value: 0.8369199517552213 and parameters: {'n_estimators': 160, 'max_depth': 47, 'max_features': 16, 'min_samples_leaf': 4, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369199517552213


[32m[I 2021-11-21 17:59:10,267][0m Trial 62 finished with value: 0.8369136037580143 and parameters: {'n_estimators': 160, 'max_depth': 39, 'max_features': 16, 'min_samples_leaf': 4, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369136037580143


[32m[I 2021-11-21 17:59:10,775][0m Trial 63 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 70, 'max_depth': 47, 'max_features': 18, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:11,634][0m Trial 64 finished with value: 0.8301720307243065 and parameters: {'n_estimators': 160, 'max_depth': 60, 'max_features': 9, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8301720307243065


[32m[I 2021-11-21 17:59:12,507][0m Trial 65 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 120, 'max_depth': 45, 'max_features': 19, 'min_samples_leaf': 4, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:59:13,718][0m Trial 66 finished with value: 0.8324255697327493 and parameters: {'n_estimators': 190, 'max_depth': 54, 'max_features': 13, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8324255697327493


[32m[I 2021-11-21 17:59:15,398][0m Trial 67 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 260, 'max_depth': 48, 'max_features': 16, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:59:16,041][0m Trial 68 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 80, 'max_depth': 39, 'max_features': 24, 'min_samples_leaf': 4, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:17,643][0m Trial 69 finished with value: 0.8380308512664254 and parameters: {'n_estimators': 220, 'max_depth': 32, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8380308512664254


[32m[I 2021-11-21 17:59:19,813][0m Trial 70 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 300, 'max_depth': 31, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:21,151][0m Trial 71 finished with value: 0.8346600647495717 and parameters: {'n_estimators': 200, 'max_depth': 44, 'max_features': 18, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8346600647495717


[32m[I 2021-11-21 17:59:22,157][0m Trial 72 finished with value: 0.8391480987748366 and parameters: {'n_estimators': 140, 'max_depth': 25, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8391480987748366


[32m[I 2021-11-21 17:59:23,745][0m Trial 73 finished with value: 0.8346600647495717 and parameters: {'n_estimators': 220, 'max_depth': 27, 'max_features': 19, 'min_samples_leaf': 5, 'min_samples_split': 13, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8346600647495717


[32m[I 2021-11-21 17:59:25,699][0m Trial 74 finished with value: 0.8369072557608075 and parameters: {'n_estimators': 260, 'max_depth': 24, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369072557608075


[32m[I 2021-11-21 17:59:26,853][0m Trial 75 finished with value: 0.8357773122579827 and parameters: {'n_estimators': 140, 'max_depth': 31, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8357773122579827


[32m[I 2021-11-21 17:59:27,616][0m Trial 76 finished with value: 0.8324128737383354 and parameters: {'n_estimators': 100, 'max_depth': 21, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8324128737383354


[32m[I 2021-11-21 17:59:34,985][0m Trial 77 finished with value: 0.8369072557608075 and parameters: {'n_estimators': 930, 'max_depth': 60, 'max_features': 25, 'min_samples_leaf': 5, 'min_samples_split': 12, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8369072557608075


[32m[I 2021-11-21 17:59:36,711][0m Trial 78 finished with value: 0.8391607947692503 and parameters: {'n_estimators': 220, 'max_depth': 34, 'max_features': 22, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 59 with value: 0.8391607947692503.[0m


score: 0.8391607947692503


[32m[I 2021-11-21 17:59:37,146][0m Trial 79 finished with value: 0.8402716942804546 and parameters: {'n_estimators': 50, 'max_depth': 34, 'max_features': 24, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8402716942804546


[32m[I 2021-11-21 17:59:37,613][0m Trial 80 finished with value: 0.8324065257411286 and parameters: {'n_estimators': 50, 'max_depth': 34, 'max_features': 27, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8324065257411286


[32m[I 2021-11-21 17:59:38,499][0m Trial 81 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 110, 'max_depth': 38, 'max_features': 22, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:59:39,871][0m Trial 82 finished with value: 0.8357773122579826 and parameters: {'n_estimators': 170, 'max_depth': 32, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8357773122579826


[32m[I 2021-11-21 17:59:40,411][0m Trial 83 finished with value: 0.8324065257411286 and parameters: {'n_estimators': 60, 'max_depth': 24, 'max_features': 24, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8324065257411286


[32m[I 2021-11-21 17:59:41,039][0m Trial 84 finished with value: 0.8369072557608075 and parameters: {'n_estimators': 80, 'max_depth': 35, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369072557608075


[32m[I 2021-11-21 17:59:43,338][0m Trial 85 finished with value: 0.8380245032692185 and parameters: {'n_estimators': 300, 'max_depth': 29, 'max_features': 22, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8380245032692185


[32m[I 2021-11-21 17:59:45,614][0m Trial 86 finished with value: 0.8121691106455913 and parameters: {'n_estimators': 210, 'max_depth': 19, 'max_features': 40, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8121691106455913


[32m[I 2021-11-21 17:59:47,940][0m Trial 87 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 280, 'max_depth': 41, 'max_features': 26, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:49,017][0m Trial 88 finished with value: 0.836919951755221 and parameters: {'n_estimators': 140, 'max_depth': 52, 'max_features': 24, 'min_samples_leaf': 4, 'min_samples_split': 8, 'bootstrap': True, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.836919951755221


[32m[I 2021-11-21 17:59:50,455][0m Trial 89 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 170, 'max_depth': 25, 'max_features': 28, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:51,800][0m Trial 90 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 180, 'max_depth': 33, 'max_features': 21, 'min_samples_leaf': 5, 'min_samples_split': 11, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 17:59:54,131][0m Trial 91 finished with value: 0.8380308512664254 and parameters: {'n_estimators': 300, 'max_depth': 29, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8380308512664254


[32m[I 2021-11-21 17:59:55,953][0m Trial 92 finished with value: 0.8357836602551896 and parameters: {'n_estimators': 230, 'max_depth': 28, 'max_features': 25, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8357836602551896


[32m[I 2021-11-21 17:59:58,521][0m Trial 93 finished with value: 0.8369072557608075 and parameters: {'n_estimators': 350, 'max_depth': 100, 'max_features': 22, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369072557608075


[32m[I 2021-11-21 18:00:04,178][0m Trial 94 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 760, 'max_depth': 36, 'max_features': 23, 'min_samples_leaf': 5, 'min_samples_split': 10, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 18:00:06,175][0m Trial 95 finished with value: 0.8391480987748364 and parameters: {'n_estimators': 280, 'max_depth': 50, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8391480987748364


[32m[I 2021-11-21 18:00:08,218][0m Trial 96 finished with value: 0.8357773122579826 and parameters: {'n_estimators': 270, 'max_depth': 49, 'max_features': 20, 'min_samples_leaf': 5, 'min_samples_split': 9, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8357773122579826


[32m[I 2021-11-21 18:00:10,245][0m Trial 97 finished with value: 0.8391544467720433 and parameters: {'n_estimators': 240, 'max_depth': 51, 'max_features': 19, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8391544467720433


[32m[I 2021-11-21 18:00:11,937][0m Trial 98 finished with value: 0.8369009077636006 and parameters: {'n_estimators': 240, 'max_depth': 57, 'max_features': 19, 'min_samples_leaf': 5, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8369009077636006


[32m[I 2021-11-21 18:00:12,858][0m Trial 99 finished with value: 0.8346664127467784 and parameters: {'n_estimators': 130, 'max_depth': 51, 'max_features': 17, 'min_samples_leaf': 3, 'min_samples_split': 8, 'bootstrap': False, 'criterion': 'entropy'}. Best is trial 79 with value: 0.8402716942804546.[0m


score: 0.8346664127467784


In [89]:
study.best_params

{'n_estimators': 50,
 'max_depth': 34,
 'max_features': 24,
 'min_samples_leaf': 5,
 'min_samples_split': 9,
 'bootstrap': False,
 'criterion': 'entropy'}

In [71]:
model_rf = RandomForestClassifier(bootstrap=True,
                                  criterion='entropy',
                                  max_depth=50, max_features=6, 
                                  min_samples_leaf=1, 
                                  min_samples_split=10, 
                                  n_estimators=100,
                                  random_state=734)
y_pred_exist = model_rf.fit(X_train, y_train).predict(X_test)
(y_test.values == y_pred_exist).sum() / y_test.shape

array([0.8062201])

In [97]:
#model_rf_new = RandomForestClassifier(**study.best_params)
y_pred_new = model_rf_new.fit(X_train, y_train).predict(X_test)
(y_test.values == y_pred_new).sum() / y_test.shape

array([0.78708134])

In [98]:
cross_val_score(model_rf, X_train, y_train, cv=cv, scoring='accuracy').mean()

0.8290484352186885