In [1]:
import pandas as pd
import numpy as np
import cupy as cp  # GPU

import sklearn.metrics as mt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp; </p>

In [2]:
data = pd.read_csv('data/05_diabetes.csv')
data.head(3)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1


In [3]:
y = data['Outcome']
X = data.drop(columns='Outcome')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [4]:
model = XGBClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[88 19]
 [12 35]]

Accuracy: 0.7987012987012987


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [5]:
params_short = {
    'max_depth': [3, 4, 5],
    'subsample': [0.7, 0.8, 0.9],
    'n_estimators': [100, 500, 750, 1000],
    'learning_rate': [0.1, 0.2, 0.3]
}

# params_mid = {
#     'max_depth': [3, 5, 7, 9],
#     'subsample': [0.2, 0.4, 0.6, 0.8],
#     'n_estimators': [100, 500, 1000, 2000, 5000],
#     'learning_rate': [0.2, 0.4, 0.6, 0.8]
# }

# # # params_long = {
# # #     'max_depth': range(3,10),
# # #     'subsample': np.arange(0.1, 1, 0.1),
# # #     'n_estimators': [100, 250, 500, 750, 1000, 2000, 5000],
# # #     'learning_rate': np.arange(0.1, 1, 0.1)
# # # }

params = params_short

In [6]:
%%time

grid = GridSearchCV(estimator=model, param_grid=params, cv=10, n_jobs=-1)
grid.fit(X_train, y_train)

print(grid.best_params_)
print(grid.best_score_)
print(grid.best_estimator_, '\n')

{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.9}
0.7557377049180328
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=100, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...) 

CPU times: user 1min 17s, sys: 2min 16s, total: 3min 34s
Wall time: 1min 3s


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [7]:
model_gpu = XGBClassifier(device='cuda', tree_method='hist') # eval_metric='rmse'
model_gpu.fit(X_train, y_train)
predictions_gpu = model_gpu.predict(cp.asarray(X_test))

confusion_matrix = mt.confusion_matrix(y_test, predictions_gpu)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions_gpu)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[90 17]
 [12 35]]

Accuracy: 0.8116883116883117


In [8]:
# # %%time

# # grid_gpu = GridSearchCV(estimator=model_gpu, param_grid=params, cv=10, n_jobs=-1)
# # grid_gpu.fit(X_train, y_train)

# # print(grid_gpu.best_params_)
# # print(grid_gpu.best_score_)
# # print(grid_gpu.best_estimator_, '\n')

**This is params_long output for model gpu with params_short**
```
{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.9}
0.7556583818085668
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device='cuda', early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=100, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...) 

CPU times: user 3min 52s, sys: 6min 35s, total: 10min 27s
Wall time: 18min 22s
```

<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [9]:
model = grid.best_estimator_
# model = XGBClassifier(learning_rate=0.1, max_depth=3, 
#                       n_estimators=100, subsample=0.9)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[92 15]
 [12 35]]

Accuracy: 0.8246753246753247


<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp; </p>