In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score

from sklearn.model_selection import (RandomizedSearchCV, train_test_split)



In [2]:
breast_cancer_X, breast_cancer_y = load_breast_cancer(return_X_y=True)

X = pd.DataFrame(breast_cancer_X)
y = pd.Series(breast_cancer_y).map({0:1, 1:0})

X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


((398, 30), (171, 30))

In [4]:
stats.randint.rvs(1, 5)

1

In [5]:
stats.uniform.rvs(0, 1)

0.7527656279544307

In [8]:
# random search

gbm = GradientBoostingClassifier(random_state=0)

param_grid = {
    'n_estimators': stats.randint(10, 120),
    'min_samples_split': stats.uniform(0, 1),
    'max_depth': stats.randint(1, 5),
    'loss': ('log_loss', 'exponential'),
}

In [9]:
search = RandomizedSearchCV(gbm,
                           param_grid,
                           scoring='roc_auc',
                           cv=5,
                           n_iter=60,
                           random_state=10,
                           n_jobs=-1,
                           refit=True)

search.fit(X_train, y_train)

In [10]:
search.best_params_

{'loss': 'log_loss',
 'max_depth': 2,
 'min_samples_split': 0.04309735620499444,
 'n_estimators': 115}

In [11]:
results = pd.DataFrame(search.cv_results_)

print(results.shape)

results.tail()

(60, 17)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_loss,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
55,0.208434,0.010172,0.002399,0.000491,exponential,3,0.584196,47,"{'loss': 'exponential', 'max_depth': 3, 'min_s...",1.0,0.983333,0.985333,0.977891,0.994558,0.988223,0.007973,37
56,0.159225,0.005749,0.002401,0.000491,exponential,1,0.373062,65,"{'loss': 'exponential', 'max_depth': 1, 'min_s...",1.0,0.988667,0.976,0.981973,0.994558,0.988239,0.008573,36
57,0.025706,0.000605,0.001801,0.000401,log_loss,1,0.10462,10,"{'loss': 'log_loss', 'max_depth': 1, 'min_samp...",1.0,0.971667,0.973333,0.962245,0.97483,0.976415,0.012583,59
58,0.035108,0.00097,0.001801,0.000399,log_loss,1,0.653623,14,"{'loss': 'log_loss', 'max_depth': 1, 'min_samp...",0.998667,0.973667,0.972667,0.961565,0.980272,0.977367,0.012228,58
59,0.553922,0.022747,0.0018,0.0004,log_loss,3,0.212213,110,"{'loss': 'log_loss', 'max_depth': 3, 'min_samp...",1.0,0.986,0.991333,0.982653,0.997959,0.991589,0.00667,4
