In [6]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn import metrics

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.ensemble import RandomForestClassifier
seed = 2017

In [7]:
# read the data in
df = pd.read_csv("data/Diabetes.csv")
X = df.iloc[:,:8].values     # independent variables
y = df['class'].values     # dependent variables

In [8]:
#Normalize
X = StandardScaler().fit_transform(X)

# evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
num_trees = 100
clf_rf = RandomForestClassifier(random_state=seed).fit(X_train, y_train)

In [10]:
rf_params = {
    'n_estimators': [100, 250, 500, 750, 1000],
    'criterion':  ['gini', 'entropy'],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'max_depth': [1, 3, 5, 7, 9]
}

# setting verbose = 10 will print the progress for every 10 task completion
grid = GridSearchCV(clf_rf, rf_params, scoring='roc_auc', cv=kfold, verbose=10, n_jobs=-1)
grid.fit(X_train, y_train)

print ('Best Parameters: ', grid.best_params_)

results = cross_val_score(grid.best_estimator_, X_train,y_train, cv=kfold)
print ("Accuracy - Train CV: ", results.mean())
print ("Accuracy - Train : ", metrics.accuracy_score(grid.best_estimator_.predict(X_train), y_train))
print ("Accuracy - Test : ", metrics.accuracy_score(grid.best_estimator_.predict(X_test), y_test))

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV 2/5; 156/200] END criterion=entropy, max_depth=5, max_features=log2, n_estimators=100;, score=0.815 total time=   0.2s
[CV 3/5; 156/200] START criterion=entropy, max_depth=5, max_features=log2, n_estimators=100
[CV 3/5; 156/200] END criterion=entropy, max_depth=5, max_features=log2, n_estimators=100;, score=0.819 total time=   0.2s
[CV 1/5; 157/200] START criterion=entropy, max_depth=5, max_features=log2, n_estimators=250
[CV 1/5; 157/200] END criterion=entropy, max_depth=5, max_features=log2, n_estimators=250;, score=0.847 total time=   0.5s
[CV 2/5; 159/200] START criterion=entropy, max_depth=5, max_features=log2, n_estimators=750
[CV 2/5; 159/200] END criterion=entropy, max_depth=5, max_features=log2, n_estimators=750;, score=0.817 total time=   1.3s
[CV 3/5; 163/200] START criterion=entropy, max_depth=7, max_features=None, n_estimators=500
[CV 3/5; 163/200] END criterion=entropy, max_depth=7, max_features=None, n_e

[CV 5/5; 109/200] END criterion=entropy, max_depth=1, max_features=auto, n_estimators=750;, score=nan total time=   0.0s
[CV 1/5; 110/200] START criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000
[CV 1/5; 110/200] END criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 2/5; 110/200] START criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000
[CV 2/5; 110/200] END criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 3/5; 110/200] START criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000
[CV 3/5; 110/200] END criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 4/5; 110/200] START criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000
[CV 4/5; 110/200] END criterion=entropy, max_depth=1, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 5/5; 110/200] S

[CV 4/5; 90/200] END criterion=gini, max_depth=9, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 5/5; 90/200] START criterion=gini, max_depth=9, max_features=auto, n_estimators=1000
[CV 5/5; 90/200] END criterion=gini, max_depth=9, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 1/5; 91/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100
[CV 1/5; 91/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100;, score=0.810 total time=   0.2s
[CV 2/5; 92/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250
[CV 2/5; 92/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250;, score=0.811 total time=   0.4s
[CV 3/5; 93/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=500
[CV 3/5; 93/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=500;, score=0.814 total time=   0.8s
[CV 3/5; 95/200] START criterion=gini, max_depth=9, ma

[CV 3/5; 30/200] START criterion=gini, max_depth=3, max_features=auto, n_estimators=1000
[CV 3/5; 30/200] END criterion=gini, max_depth=3, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 3/5; 31/200] START criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100
[CV 3/5; 31/200] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.799 total time=   0.1s
[CV 4/5; 31/200] START criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100
[CV 4/5; 31/200] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.854 total time=   0.1s
[CV 5/5; 31/200] START criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100
[CV 5/5; 31/200] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.747 total time=   0.2s
[CV 1/5; 32/200] START criterion=gini, max_depth=3, max_features=sqrt, n_estimators=250
[CV 1/5; 32/200] END criterion=gini, max_depth=3, max_features=sqrt, n_estimators=25

[CV 5/5; 139/200] START criterion=entropy, max_depth=3, max_features=log2, n_estimators=750
[CV 5/5; 139/200] END criterion=entropy, max_depth=3, max_features=log2, n_estimators=750;, score=0.765 total time=   1.2s
[CV 1/5; 140/200] START criterion=entropy, max_depth=3, max_features=log2, n_estimators=1000
[CV 1/5; 140/200] END criterion=entropy, max_depth=3, max_features=log2, n_estimators=1000;, score=0.862 total time=   1.7s
[CV 4/5; 152/200] START criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=250
[CV 4/5; 152/200] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=250;, score=0.860 total time=   0.4s
[CV 5/5; 154/200] START criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=750
[CV 5/5; 154/200] END criterion=entropy, max_depth=5, max_features=sqrt, n_estimators=750;, score=0.769 total time=   1.3s
[CV 2/5; 158/200] START criterion=entropy, max_depth=5, max_features=log2, n_estimators=500
[CV 2/5; 158/200] END criterion=entropy, max_d

[CV 2/5; 11/200] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.792 total time=   0.1s
[CV 3/5; 11/200] START criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100
[CV 3/5; 11/200] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.788 total time=   0.1s
[CV 4/5; 11/200] START criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100
[CV 4/5; 11/200] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=100;, score=0.832 total time=   0.1s
[CV 5/5; 13/200] START criterion=gini, max_depth=1, max_features=sqrt, n_estimators=500
[CV 5/5; 13/200] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=500;, score=0.728 total time=   0.6s
[CV 1/5; 14/200] START criterion=gini, max_depth=1, max_features=sqrt, n_estimators=750
[CV 1/5; 14/200] END criterion=gini, max_depth=1, max_features=sqrt, n_estimators=750;, score=0.856 total time=   0.9s
[CV 2/5; 24/200] START criterion=gini, max_depth=3, m

[CV 3/5; 167/200] END criterion=entropy, max_depth=7, max_features=auto, n_estimators=250;, score=nan total time=   0.0s
[CV 4/5; 167/200] START criterion=entropy, max_depth=7, max_features=auto, n_estimators=250
[CV 4/5; 167/200] END criterion=entropy, max_depth=7, max_features=auto, n_estimators=250;, score=nan total time=   0.0s
[CV 5/5; 167/200] START criterion=entropy, max_depth=7, max_features=auto, n_estimators=250
[CV 5/5; 167/200] END criterion=entropy, max_depth=7, max_features=auto, n_estimators=250;, score=nan total time=   0.0s
[CV 1/5; 168/200] START criterion=entropy, max_depth=7, max_features=auto, n_estimators=500
[CV 1/5; 168/200] END criterion=entropy, max_depth=7, max_features=auto, n_estimators=500;, score=nan total time=   0.0s
[CV 3/5; 168/200] START criterion=entropy, max_depth=7, max_features=auto, n_estimators=500
[CV 3/5; 168/200] END criterion=entropy, max_depth=7, max_features=auto, n_estimators=500;, score=nan total time=   0.0s
[CV 5/5; 168/200] START cri

[CV 4/5; 22/200] END criterion=gini, max_depth=3, max_features=None, n_estimators=250;, score=0.834 total time=   0.4s
[CV 5/5; 22/200] START criterion=gini, max_depth=3, max_features=None, n_estimators=250
[CV 5/5; 22/200] END criterion=gini, max_depth=3, max_features=None, n_estimators=250;, score=0.767 total time=   0.4s
[CV 1/5; 23/200] START criterion=gini, max_depth=3, max_features=None, n_estimators=500
[CV 1/5; 23/200] END criterion=gini, max_depth=3, max_features=None, n_estimators=500;, score=0.855 total time=   1.1s
[CV 2/5; 23/200] START criterion=gini, max_depth=3, max_features=None, n_estimators=500
[CV 2/5; 23/200] END criterion=gini, max_depth=3, max_features=None, n_estimators=500;, score=0.830 total time=   1.0s
[CV 4/5; 42/200] START criterion=gini, max_depth=5, max_features=None, n_estimators=250
[CV 4/5; 42/200] END criterion=gini, max_depth=5, max_features=None, n_estimators=250;, score=0.849 total time=   0.6s
[CV 5/5; 42/200] START criterion=gini, max_depth=5, m

[CV 5/5; 179/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=750;, score=0.773 total time=   1.5s
[CV 1/5; 180/200] START criterion=entropy, max_depth=7, max_features=log2, n_estimators=1000
[CV 1/5; 180/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=1000;, score=0.848 total time=   1.9s
[CV 4/5; 193/200] START criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=500
[CV 4/5; 193/200] END criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=500;, score=0.850 total time=   0.9s
[CV 4/5; 195/200] START criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=1000
[CV 4/5; 195/200] END criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=1000;, score=0.843 total time=   1.8s
[CV 3/5; 200/200] START criterion=entropy, max_depth=9, max_features=log2, n_estimators=1000
[CV 3/5; 200/200] END criterion=entropy, max_depth=9, max_features=log2, n_estimators=1000;, score=0.809 total time=   1.4s
[CV 5/5; 3/

[CV 2/5; 61/200] END criterion=gini, max_depth=7, max_features=None, n_estimators=100;, score=0.808 total time=   0.2s
[CV 2/5; 62/200] START criterion=gini, max_depth=7, max_features=None, n_estimators=250
[CV 2/5; 62/200] END criterion=gini, max_depth=7, max_features=None, n_estimators=250;, score=0.814 total time=   0.6s
[CV 3/5; 63/200] START criterion=gini, max_depth=7, max_features=None, n_estimators=500
[CV 3/5; 63/200] END criterion=gini, max_depth=7, max_features=None, n_estimators=500;, score=0.796 total time=   1.2s
[CV 1/5; 66/200] START criterion=gini, max_depth=7, max_features=auto, n_estimators=100
[CV 1/5; 66/200] END criterion=gini, max_depth=7, max_features=auto, n_estimators=100;, score=nan total time=   0.0s
[CV 2/5; 66/200] START criterion=gini, max_depth=7, max_features=auto, n_estimators=100
[CV 2/5; 66/200] END criterion=gini, max_depth=7, max_features=auto, n_estimators=100;, score=nan total time=   0.0s
[CV 3/5; 66/200] START criterion=gini, max_depth=7, max_f

[CV 1/5; 176/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=100;, score=0.843 total time=   0.2s
[CV 1/5; 183/200] START criterion=entropy, max_depth=9, max_features=None, n_estimators=500
[CV 1/5; 183/200] END criterion=entropy, max_depth=9, max_features=None, n_estimators=500;, score=0.845 total time=   1.4s
[CV 2/5; 183/200] START criterion=entropy, max_depth=9, max_features=None, n_estimators=500
[CV 2/5; 183/200] END criterion=entropy, max_depth=9, max_features=None, n_estimators=500;, score=0.800 total time=   1.4s
[CV 3/5; 194/200] START criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=750
[CV 3/5; 194/200] END criterion=entropy, max_depth=9, max_features=sqrt, n_estimators=750;, score=0.805 total time=   1.3s
[CV 1/5; 197/200] START criterion=entropy, max_depth=9, max_features=log2, n_estimators=250
[CV 1/5; 197/200] END criterion=entropy, max_depth=9, max_features=log2, n_estimators=250;, score=0.836 total time=   0.5s
[CV 4/5; 198/200]

[CV 1/5; 98/200] START criterion=gini, max_depth=9, max_features=log2, n_estimators=500
[CV 1/5; 98/200] END criterion=gini, max_depth=9, max_features=log2, n_estimators=500;, score=0.835 total time=   0.9s
[CV 3/5; 100/200] START criterion=gini, max_depth=9, max_features=log2, n_estimators=1000
[CV 3/5; 100/200] END criterion=gini, max_depth=9, max_features=log2, n_estimators=1000;, score=0.801 total time=   1.9s
[CV 2/5; 112/200] START criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=250
[CV 2/5; 112/200] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=250;, score=0.790 total time=   0.4s
[CV 2/5; 114/200] START criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=750
[CV 2/5; 114/200] END criterion=entropy, max_depth=1, max_features=sqrt, n_estimators=750;, score=0.790 total time=   1.1s
[CV 1/5; 119/200] START criterion=entropy, max_depth=1, max_features=log2, n_estimators=750
[CV 1/5; 119/200] END criterion=entropy, max_depth=1, max_fe

[CV 1/5; 74/200] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=750;, score=0.836 total time=   1.3s
[CV 3/5; 79/200] START criterion=gini, max_depth=7, max_features=log2, n_estimators=750
[CV 3/5; 79/200] END criterion=gini, max_depth=7, max_features=log2, n_estimators=750;, score=0.807 total time=   1.4s
[CV 4/5; 79/200] START criterion=gini, max_depth=7, max_features=log2, n_estimators=750
[CV 4/5; 79/200] END criterion=gini, max_depth=7, max_features=log2, n_estimators=750;, score=0.854 total time=   1.3s
[CV 4/5; 92/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250
[CV 4/5; 92/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250;, score=0.843 total time=   0.4s
[CV 5/5; 92/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250
[CV 5/5; 92/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=250;, score=0.772 total time=   0.4s
[CV 4/5; 94/200] START criterion=gini, max_depth=9, m

[CV 4/5; 90/200] START criterion=gini, max_depth=9, max_features=auto, n_estimators=1000
[CV 4/5; 90/200] END criterion=gini, max_depth=9, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 5/5; 90/200] START criterion=gini, max_depth=9, max_features=auto, n_estimators=1000
[CV 5/5; 90/200] END criterion=gini, max_depth=9, max_features=auto, n_estimators=1000;, score=nan total time=   0.0s
[CV 1/5; 91/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100
[CV 1/5; 91/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100;, score=0.810 total time=   0.2s
[CV 2/5; 91/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100
[CV 2/5; 91/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100;, score=0.813 total time=   0.2s
[CV 5/5; 91/200] START criterion=gini, max_depth=9, max_features=sqrt, n_estimators=100
[CV 5/5; 91/200] END criterion=gini, max_depth=9, max_features=sqrt, n_estimators=10

250 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
90 fits failed with the following error:
Traceback (most recent call last):
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/base.py", line 1144, in wrapper
    estimator._validate_params()
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "/home/kazi/Works/Projects/machine-learning/v

Best Parameters:  {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'log2', 'n_estimators': 500}
Accuracy - Train CV:  0.7522499134648667
Accuracy - Train :  0.8621973929236499
Accuracy - Test :  0.7965367965367965


***RandomSearch***

In [12]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint

# specify parameters and distributions to sample from
param_dist = {'n_estimators':sp_randint(100,1000),
              'criterion': ['gini', 'entropy'],
              'max_features': [None, 'auto', 'sqrt', 'log2'],
              'max_depth': [None, 1, 3, 5, 7, 9]
             }

# run randomized search
n_iter_search = 20
random_search = RandomizedSearchCV(clf_rf, param_distributions=param_dist, cv=kfold, 
                                   n_iter=n_iter_search, verbose=10, n_jobs=-1, random_state=seed)

random_search.fit(X_train, y_train)
# report(random_search.cv_results_)

print ('Best Parameters: ', random_search.best_params_)

results = cross_val_score(random_search.best_estimator_, X_train,y_train, cv=kfold)
print ("Accuracy - Train CV: ", results.mean())
print ("Accuracy - Train : ", metrics.accuracy_score(random_search.best_estimator_.predict(X_train), y_train))
print ("Accuracy - Test : ", metrics.accuracy_score(random_search.best_estimator_.predict(X_test), y_test))

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 3/5; 58/200] END criterion=gini, max_depth=5, max_features=log2, n_estimators=500;, score=0.804 total time=   0.9s
[CV 2/5; 64/200] START criterion=gini, max_depth=7, max_features=None, n_estimators=750
[CV 2/5; 64/200] END criterion=gini, max_depth=7, max_features=None, n_estimators=750;, score=0.818 total time=   1.8s
[CV 4/5; 73/200] START criterion=gini, max_depth=7, max_features=sqrt, n_estimators=500
[CV 4/5; 73/200] END criterion=gini, max_depth=7, max_features=sqrt, n_estimators=500;, score=0.852 total time=   0.8s
[CV 1/5; 77/200] START criterion=gini, max_depth=7, max_features=log2, n_estimators=250
[CV 1/5; 77/200] END criterion=gini, max_depth=7, max_features=log2, n_estimators=250;, score=0.842 total time=   0.4s
[CV 2/5; 77/200] START criterion=gini, max_depth=7, max_features=log2, n_estimators=250
[CV 2/5; 77/200] END criterion=gini, max_depth=7, max_features=log2, n_estimators=250;, score=0.814 total time

[CV 1/5; 173/200] START criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=500
[CV 1/5; 173/200] END criterion=entropy, max_depth=7, max_features=sqrt, n_estimators=500;, score=0.847 total time=   0.9s
[CV 2/5; 176/200] START criterion=entropy, max_depth=7, max_features=log2, n_estimators=100
[CV 2/5; 176/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=100;, score=0.818 total time=   0.2s
[CV 3/5; 176/200] START criterion=entropy, max_depth=7, max_features=log2, n_estimators=100
[CV 3/5; 176/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=100;, score=0.824 total time=   0.2s
[CV 2/5; 178/200] START criterion=entropy, max_depth=7, max_features=log2, n_estimators=500
[CV 2/5; 178/200] END criterion=entropy, max_depth=7, max_features=log2, n_estimators=500;, score=0.814 total time=   1.0s
[CV 3/5; 178/200] START criterion=entropy, max_depth=7, max_features=log2, n_estimators=500
[CV 3/5; 178/200] END criterion=entropy, max_dep

20 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/base.py", line 1144, in wrapper
    estimator._validate_params()
  File "/home/kazi/Works/Projects/machine-learning/venv/lib/python3.10/site-packages/sklearn/base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "/home/kazi/Works/Projects/machine-learning/ven

Best Parameters:  {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'n_estimators': 853}
Accuracy - Train CV:  0.7522152994115612
Accuracy - Train :  0.8640595903165735
Accuracy - Test :  0.7878787878787878


In [None]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import RandomForestClassifier
from bayes_opt import BayesianOptimization, UtilityFunction

# Assuming X_train, y_train are defined somewhere in your script

def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators),
                                                  min_samples_split=int(min_samples_split),
                                                  max_features=min(max_features, 0.999),
                                                  random_state=2017),
                           X_train, y_train, scoring='f1', cv=kfold).mean()

# Initialize kfold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Create BayesianOptimization object
rfcBO = BayesianOptimization(rfccv, {'n_estimators': (100, 1000),
                                     'min_samples_split': (2, 25),
                                     'max_features': (0.1, 0.999)})

# Set Gaussian Process parameters using set_gp_params method
rfcBO.set_gp_params(alpha=1e5)

# Create an instance of UtilityFunction
utility = UtilityFunction(kind="ucb", kappa=2.5, xi=0.0)

# Perform Bayesian Optimization
rfcBO.maximize(init_points=5, n_iter=10, acquisition_function=utility)

# Print the maximum F1 score
print('RFC: %f' % rfcBO.res['max']['max_val'])

|   iter    |  target   | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.6465   [0m | [0m0.4689   [0m | [0m22.22    [0m | [0m781.4    [0m |
| [0m2        [0m | [0m0.6262   [0m | [0m0.2954   [0m | [0m10.8     [0m | [0m406.7    [0m |
| [0m3        [0m | [0m0.6371   [0m | [0m0.7837   [0m | [0m10.71    [0m | [0m354.8    [0m |
| [0m4        [0m | [0m0.5875   [0m | [0m0.1693   [0m | [0m12.97    [0m | [0m706.8    [0m |
| [0m5        [0m | [0m0.6247   [0m | [0m0.7811   [0m | [0m3.747    [0m | [0m843.4    [0m |
| [0m6        [0m | [0m0.6255   [0m | [0m0.3971   [0m | [0m8.025    [0m | [0m100.2    [0m |
| [0m7        [0m | [0m0.632    [0m | [0m0.6497   [0m | [0m16.14    [0m | [0m1e+03    [0m |
| [0m8        [0m | [0m0.6196   [0m | [0m0.2585   [0m | [0m4.991    [0m | [0m100.1    [0m |
| [0m9        [0m | [0m0.6349   [0m | [0m0.8549   [0m 