# Optimal Gridsearch
An Exploration of Bayesian approach to hyperparameter tuning.

In [1]:
import pandas as pd
import numpy as np
from functools import partial
from xgboost import XGBRegressor, DMatrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.utils import point_asdict

## Import Data
We use the ILEC 2009-15 Individual Life Insurance Mortality Experience Report data (https://www.soa.org/resources/research-reports/2019/2009-2015-individual-life-mortality/). Cases are restricted between issue age [40,60) and duration < 10. Cases without exposure are omitted.

In [2]:
cols = [ 'Preferred Indicator',
         'Face Amount Band','Gender',
         'Smoker Status',
 'Insurance Plan',
 'Issue Age',
 'Duration',
 'Attained Age',
 'Number of Deaths',
 'Policies Exposed',]
dat = pd.read_csv('../../scratchpad/2009-15 Data 20180601.txt',nrows=2e6,sep='\t',usecols=cols)
dat = dat[(dat['Issue Age']>=40)&(dat['Issue Age'] <60)&(dat['Duration']<10)]
dat = dat[dat['Policies Exposed']>0]

In [3]:
dat.head()

Unnamed: 0,Preferred Indicator,Gender,Smoker Status,Insurance Plan,Issue Age,Duration,Attained Age,Face Amount Band,Number of Deaths,Policies Exposed
0,1,Female,NonSmoker,Term,44,9,52,100000-249999,0,2.660124
1,1,Female,NonSmoker,ULSG,40,2,41,1000000-2499999,0,0.364384
2,1,Female,NonSmoker,Term,44,9,52,100000-249999,0,20.324095
3,1,Female,NonSmoker,ULSG,40,2,41,1000000-2499999,0,0.871233
4,1,Female,NonSmoker,ULSG,40,2,41,2500000-4999999,0,0.172603


In [4]:
regs = [ 'Preferred Indicator',
         'Face Amount Band','Gender',
         'Smoker Status',
 'Insurance Plan',
 'Issue Age',
 'Duration']

labelencoder = LabelEncoder()
dat['Face Amount Band'] = labelencoder.fit_transform(dat['Face Amount Band'].astype('category'))
dat['Gender'] = labelencoder.fit_transform(dat['Gender'].astype('category'))
dat['Smoker Status'] = labelencoder.fit_transform(dat['Smoker Status'].astype('category'))
dat['Insurance Plan'] = labelencoder.fit_transform(dat['Insurance Plan'].astype('category'))


X_train, X_eval, y_train, y_eval, exp_train, exp_eval = train_test_split(dat[regs],dat['Number of Deaths'], dat['Policies Exposed'],
                                                    test_size=.3, random_state=52,
                                                    stratify = dat[['Issue Age','Duration','Gender']])

## Create Callback Function
This function outputs the fit statistics for each test to .csv.

In [5]:
pd.options.mode.chained_assignment = None

In [6]:
def cb(v,filename = '../Output/tuning_results.csv', verbose = 0):
    last_point = v.x_iters[-1]
    p = point_asdict(opt.search_spaces, last_point)
    if verbose>0:
        print("Tried {0}, score ={1}".format(p,opt.cv_results_.get('mean_test_score')[-1]))
    df_cv = pd.DataFrame(opt.cv_results_.get('params')).\
    assign(score = opt.cv_results_.get('mean_test_score'))
    df_cv = df_cv.assign(train_score = opt.cv_results_.get('mean_train_score'))
    df_cv['train_test_diff'] = df_cv.train_score-df_cv.score
    if verbose>0:
        print("best score = {0} @ diff = {1}".format(df_cv.score.max(),df_cv[df_cv.score==df_cv.score.max()].train_test_diff.min()))
    if filename:
        df_cv.to_csv(filename)

In [7]:
pipe = Pipeline(
    steps = [('model', XGBRegressor())],verbose = True
)

opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=30,
    cv=3,
    return_train_score = True
)

opt.fit(X_train,y_train,callback = cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   5.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   5.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   5.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.1s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.1s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.1s
[Pipeline] ............. (step 1 of 1) Processing model, total=   3.3s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
[Pipel

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [8]:
my_cb = partial(cb,filename='../Output/tuning_results_EI.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'EI','acq_optimizer_kwargs' :{'xi': 0.2}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   5.8s
[Pipeline] ............. (step 1 of 1) Processing model, total=   5.8s
[Pipeline] ............. (step 1 of 1) Processing model, total=   6.0s
Tried OrderedDict([('model__gamma', 0.0011932197403764064), ('model__learning_rate', 0.1918035907459515), ('model__max_depth', 3), ('model__n_estimators', 500), ('model__objective', 'count:poisson')]), score =-0.0068668058031450974
best score = -0.0068668058031450974 @ diff = 0.0009763767980608745
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.4s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.3s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.3s
Tried OrderedDict([('model__gamma', 0.00034319605651276895), ('model__learning_rate', 0.15244126415185713), ('model__max_depth', 3), ('model__n_estimators', 200), ('model__objective', 'count:poisson')]), score =-0.006877313467068639
best score = -0.0068668058031450974 @ diff

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [9]:
my_cb = partial(cb,filename='../Output/tuning_results_EI_exploit.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'EI','acq_optimizer_kwargs' :{'xi':0.01}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
Tried OrderedDict([('model__gamma', 0.01248587313576065), ('model__learning_rate', 0.15771355774135823), ('model__max_depth', 3), ('model__n_estimators', 50), ('model__objective', 'count:poisson')]), score =0.00032010033033751394
best score = 0.00032010033033751394 @ diff = 0.00010017504357413544
[Pipeline] ............. (step 1 of 1) Processing model, total=   1.2s
[Pipeline] ............. (step 1 of 1) Processing model, total=   1.2s
[Pipeline] ............. (step 1 of 1) Processing model, total=   1.2s
Tried OrderedDict([('model__gamma', 0.0008698173501517189), ('model__learning_rate', 0.18324885893452741), ('model__max_depth', 3), ('model__n_estimators', 100), ('model__objective', 'count:poisson')]), score =-0.007026142784461425
best score = 0.00032010033033751394 @ diff =

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [10]:
my_cb = partial(cb,filename='../Output/tuning_results_PI.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'PI','acq_optimizer_kwargs' :{'xi':0.05}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.7s
Tried OrderedDict([('model__gamma', 0.14397067192398075), ('model__learning_rate', 0.24605335280057525), ('model__max_depth', 4), ('model__n_estimators', 50), ('model__objective', 'count:poisson')]), score =-0.0054757849393575996
best score = -0.0054757849393575996 @ diff = 0.0002008655574515109
[Pipeline] ............. (step 1 of 1) Processing model, total=   4.4s
[Pipeline] ............. (step 1 of 1) Processing model, total=   4.5s
[Pipeline] ............. (step 1 of 1) Processing model, total=   4.4s
Tried OrderedDict([('model__gamma', 0.01733263899193809), ('model__learning_rate', 0.23261212860797187), ('model__max_depth', 2), ('model__n_estimators', 500), ('model__objective', 'count:poisson')]), score =-0.006833050443898713
best score = -0.0054757849393575996 @ diff = 0.

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [11]:
my_cb = partial(cb,filename='../Output/tuning_results_PI_exploit.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'PI','acq_optimizer_kwargs' :{'xi':0.3}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
Tried OrderedDict([('model__gamma', 8.197817748431884), ('model__learning_rate', 0.2784637345094721), ('model__max_depth', 2), ('model__n_estimators', 100), ('model__objective', 'count:poisson')]), score =-0.007260783447614703
best score = -0.007260783447614703 @ diff = 6.4156478946629e-05
[Pipeline] ............. (step 1 of 1) Processing model, total=   3.0s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
Tried OrderedDict([('model__gamma', 1.521572253936428), ('model__learning_rate', 0.17547572092072147), ('model__max_depth', 4), ('model__n_estimators', 200), ('model__objective', 'count:poisson')]), score =-0.006958410971068219
best score = -0.006958410971068219 @ diff = 0.000297662

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [12]:
my_cb = partial(cb,filename='../Output/tuning_results_LCB.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'LCB','acq_optimizer_kwargs' :{'kappa': 2.3263}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   3.7s
[Pipeline] ............. (step 1 of 1) Processing model, total=   3.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   3.6s
Tried OrderedDict([('model__gamma', 0.00011265172684669811), ('model__learning_rate', 0.14362900940042944), ('model__max_depth', 5), ('model__n_estimators', 200), ('model__objective', 'count:poisson')]), score =-0.006834421291839081
best score = -0.006834421291839081 @ diff = 0.0012286938770088504
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.6s
Tried OrderedDict([('model__gamma', 0.0002462530820244062), ('model__learning_rate', 0.13163450851841665), ('model__max_depth', 3), ('model__n_estimators', 50), ('model__objective', 'count:poisson')]), score =-0.008180604254212779
best score = -0.006834421291839081 @ diff = 

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      

In [13]:
my_cb = partial(cb,filename='../Output/tuning_results_LCB_exploit.csv',verbose = 1)
opt = BayesSearchCV(
    pipe,
    {
        'model__objective':['count:poisson'],
        'model__n_estimators': Categorical([10,50,100,200,500]),
        'model__gamma': (1e-6, 1e+1, 'log-uniform'),
        'model__max_depth': Integer(2, 5),  # integer valued parameter
        'model__learning_rate': Real(0.005,.3),  # categorical parameter
    },
    fit_params = {
        'model__base_margin':np.log(exp_train)
    },
    n_iter=10,
    cv=3,
    return_train_score = True,
    optimizer_kwargs = {'acq_func':'LCB','acq_optimizer_kwargs' :{'kappa': 1.38}}
)

opt.fit(X_train,y_train,callback = my_cb)

[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   2.9s
Tried OrderedDict([('model__gamma', 0.037500990534027424), ('model__learning_rate', 0.2547050738201449), ('model__max_depth', 4), ('model__n_estimators', 200), ('model__objective', 'count:poisson')]), score =-0.006824960080262829
best score = -0.006824960080262829 @ diff = 0.0012921136987832415
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
[Pipeline] ............. (step 1 of 1) Processing model, total=   0.9s
Tried OrderedDict([('model__gamma', 0.030132796056755923), ('model__learning_rate', 0.048004881290200314), ('model__max_depth', 2), ('model__n_estimators', 100), ('model__objective', 'count:poisson')]), score =-0.12195773375357691
best score = -0.006824960080262829 @ diff = 0.0

BayesSearchCV(cv=3,
              estimator=Pipeline(steps=[('model',
                                         XGBRegressor(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      gamma=None, gpu_id=None,
                                                      importance_type='gain',
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                                                      max_depth=None,
                                                      min_child_weight=None,
                      