In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

In [12]:
data = pd.read_csv('diabetes.csv')
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [16]:
X = data.drop(columns=['Outcome'])
y = data[['Outcome']]

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=21)

In [31]:
model = XGBClassifier(tree_method='hist',
                      verbosity=2,
                      n_estimators=30,
                      max_depth=5,
                      max_leaves=0,
                      learning_rate=.1,
                      colsample_bytree=.5)

In [32]:
model.fit(X_train, y_train)

In [54]:
parameters = {'n_estimators' : [20, 30, 40, 50],
          'max_depth' : [2, 4, 6, 10],
          'max_leaves' : [0, 2, 4, 6, 8, 10],
          'learning_rate' : [.3, .1, .01, .005],
          'colsample_bytree' : [.8, .6, .4]
}

In [36]:
kfold = KFold(n_splits=4, random_state=21, shuffle=True)
grid = GridSearchCV(model, params, scoring='roc_auc', n_jobs=-1, cv=kfold, verbose=1)
grid.fit(X_train, y_train)

Fitting 4 folds for each of 1152 candidates, totalling 4608 fits


In [61]:
grid.best_score_, grid.best_params_

(0.8378171685180626,
 {'colsample_bytree': 0.4,
  'learning_rate': 0.3,
  'max_depth': 2,
  'max_leaves': 2,
  'n_estimators': 50})

In [59]:
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
params = grid.cv_results_['params']

for i in range(len(means)):
    print('Score: %.6f, StD. % .6f, Parameters' % (means[i], stds[i]), params[i])

Score: 0.825773, StD.  0.026742, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 0, 'n_estimators': 20}
Score: 0.824326, StD.  0.027723, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 0, 'n_estimators': 30}
Score: 0.818280, StD.  0.032009, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 0, 'n_estimators': 40}
Score: 0.821240, StD.  0.030687, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 0, 'n_estimators': 50}
Score: 0.827729, StD.  0.030284, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 2, 'n_estimators': 20}
Score: 0.831954, StD.  0.026594, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 2, 'n_estimators': 30}
Score: 0.834830, StD.  0.027362, Parameters {'colsample_bytree': 0.8, 'learning_rate': 0.3, 'max_depth': 2, 'max_leaves': 2, 'n_es

In [51]:
grid.cv_results_['std_test_score']

{'mean_fit_time': array([0.01495123, 0.01613873, 0.01403087, ..., 0.01884669, 0.01817077,
        0.01840639]),
 'std_fit_time': array([0.00225744, 0.00296631, 0.00391109, ..., 0.00274657, 0.00473145,
        0.0065847 ]),
 'mean_score_time': array([0.01002795, 0.00643975, 0.00965804, ..., 0.00458515, 0.00567067,
        0.00464249]),
 'std_score_time': array([0.00110682, 0.00205224, 0.00536637, ..., 0.0005269 , 0.00234728,
        0.00052585]),
 'param_colsample_bytree': masked_array(data=[0.8, 0.8, 0.8, ..., 0.4, 0.4, 0.4],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_learning_rate': masked_array(data=[0.3, 0.3, 0.3, ..., 0.005, 0.005, 0.005],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_max_depth': masked_array(data=[2, 2, 2, ..., 10, 10, 10],
              mask=[False, False, False, ..., False, False, False],
        f

In [53]:
for i in zip(params):
    print(i)

('n_estimators',)
('max_depth',)
('max_leaves',)
('learning_rate',)
('colsample_bytree',)
