In [90]:
import pandas as pd
import numpy as np
import time

import sklearn
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

from skopt import gp_minimize, BayesSearchCV
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_convergence

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [91]:
file_path = "data/1193_BNG_lowbwt.tsv"
dataframe = pd.read_csv(file_path, sep="\t")
d_ndarray = dataframe.values
features = d_ndarray[:,0:d_ndarray.shape[1] - 1]
labels = d_ndarray[:, -1]
t_size = 0.25
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=t_size, random_state=42)

## Hyperparameter optimisation using Skopt

In [77]:
bo_s_time = time.time()
regressor = GradientBoostingRegressor()
space  = [
          Integer(1, 10, name='max_depth'),
          Integer(1, 200, name='n_estimators'),
          #Real(10**-5, 10**0, "log-uniform", name='learning_rate'),
          #Integer(1, n_features, name='max_features'),
          #Integer(2, 100, name='min_samples_split'),
          #Integer(1, 100, name='min_samples_leaf')
         ]

@use_named_args(space)
def objective(**params):
    regressor.set_params(**params)
    return -np.mean(cross_val_score(regressor, X_train, y_train, cv=5, scoring="r2"))

regressor_gp = gp_minimize(objective, space, n_calls=30, random_state=0)
bo_e_time = time.time()
print("Time elapsed: %d seconds", int(bo_e_time - bo_s_time))
print("Best score=%.4f" % -regressor_gp.fun)
print("""Best parameters:
- max_depth=%d
- n_estimators=%d""" % (regressor_gp.x[0], regressor_gp.x[1]))
plot_convergence(regressor_gp)

KeyboardInterrupt: 

## Hyperparameter search using grid search

In [88]:
gs_s_time = time.time()
g_regressor = GradientBoostingRegressor()
tuned_parameters = [
    {
        'n_estimators': [5, 25, 50, 100],
        'max_depth': [1, 3, 5, 7, 10]
    },
]
clf = GridSearchCV(g_regressor, tuned_parameters, cv=5, scoring='r2')
clf.fit(X_train, y_train)
print("Best parameters:")
print()
print(clf.best_params_)
print("Best score:")
print()
print(clf.best_score_)
gs_e_time = time.time()
print()
print("Time elapsed: %d seconds" % int(gs_e_time - gs_s_time))

Best parameters:

{'max_depth': 7, 'n_estimators': 100}
Best score:

0.9847583346817556

Time elapsed: %d seconds 111


## Hyperopt

In [94]:
ho_s_time = time.time()
max_evals = 5
regressor = GradientBoostingRegressor()
space = {
    'n_estimators': hp.choice('n_estimators', range(1, 100)),
    'max_depth': hp.choice('max_depth', range(1, 10)),
}

def objective(params):
    regressor.set_params(**params)
    return -np.mean(cross_val_score(regressor, X_train, y_train, cv=5, scoring="r2"))

regressor_ho = fmin(objective, space, algo=tpe.suggest, max_evals=max_evals, trials=Trials())

ho_e_time = time.time()
print()
print("Time elapsed: %d seconds" % int(ho_e_time - ho_s_time))
print("Best params:")
print(regressor_ho)

100%|██████████| 5/5 [01:02<00:00, 14.10s/it, best loss: -0.6070134544409933]

Time elapsed: 62 seconds
Best params:
{'max_depth': 6, 'n_estimators': 82}


## BayesSearchCV

In [75]:
gs_bs_time = time.time()

class BayesSearchCV(BayesSearchCV):
    def _run_search(self, x): raise BaseException('Use newer skopt')

optimiser = BayesSearchCV(
    GradientBoostingRegressor(),
    {
        'max_depth': (1, 10),
        'n_estimators': (1, 100)
    },
    n_iter=30,
    cv=5,
    scoring='r2'
)

optimiser.fit(X_train, y_train)

bs_e_time = time.time()
print("Time elapsed: %d seconds", int(bs_e_time - gs_bs_time))

print("Cross-validation: %s" % optimiser.best_score_)
print("test score: %s" % optimiser.score(X_test, y_test))

KeyboardInterrupt: 