In [1]:
# imports
import pandas as pd
import os
from bayes_opt import BayesianOptimization
import xgboost as xgb

# data columns used for the booster
factors = ['property_id', 'bedrooms', 'bathrooms', 'sqft','longitude', 'latitude','zipcode', 'elevation', 'garage'
                          ,'year_built', 'level','dist_to_park','dist_to_golf_course', 'has_pool'
                          ,'date_closed','multifamily', 'hoa_fees', 'lot']

In [3]:
def XGBcv(max_depth, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, alpha):
    folds = 5
    paramt = {
        'alpha': max(alpha, 0)
        'gamma': max(gamma, 0),
        'max_depth': int(max_depth),
        'eta': 0.1,
        'objective': 'reg:linear',
        'silent': True,
        'subsample': max(min(subsample, 1), 0),
        'colsample_bytree': max(min(colsample_bytree, 1), 0),
        'min_child_weight': int(min_child_weight),
        'max_delta_step': max_delta_step.astype(int),
        'seed': 2017,
        'updater': 'grow_gpu' 
    }

    print(" Search parameters (%d-fold validation):\n %s" % (folds, paramt), file=log_file)

    out = xgb.cv(paramt,
           dtrain,
           num_boost_round=20000,
           nfold=folds,
           verbose_eval=None,
           metrics="mae",
           show_stdv=True,
           callbacks=[xgb.callback.early_stop(50)])
    
    print(out, file=log_file)
    
    return -out['test-mae-mean'].values[-1]


In [None]:
log_file = open("xgboost_parameter_tuning.log", 'a')

fast_params = {'max_depth': (4, 6),
                'gamma': (0.0001, 0.005),
                'min_child_weight': (1, 2),
                'max_delta_step': (0, 1),
                'subsample': (0.2, 0.4),
                'colsample_bytree': (0.2, 0.4),
                'alpha': (0, 10)
                }

slow_params = { 'max_depth': (5, 15),
                 'gamma': (0.0, 10.0),
                 'min_child_weight': (1, 20),
                 'max_delta_step': (0, 5),
                 'subsample': (0.5, 1.0),
                 'colsample_bytree' :(0.1, 1.0),
                 'alpha': (0, 10)
               }

# params = fast_params
params = slow_params

XGB_BOpt = BayesianOptimization(XGBcv, params)

df = pd.read_csv('CSV_backups/PH-sales.csv')
dtrain = xgb.DMatrix(df[factors].values, label=df.price, feature_names=factors)

print('\nRunning Bayesian Optimization ...\n')
XGB_BOpt.maximize(init_points=5, n_iter=25)

print('\nFinal Results')
print('XGBOOST: %f' % XGB_BOpt.res['max']['max_val'])
print('\nFinal Results', file=log_file)
print('XGBOOST: %f' % XGB_BOpt.res['max']['max_val'], file=log_file)
log_file.flush()
log_file.close()



Running Bayesian Optimization ...

[31mInitialization[0m
[94m----------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   colsample_bytree |     gamma |   max_delta_step |   max_depth |   min_child_weight |   subsample | 




    1 | 278m09s | [35m-192045.00625[0m | [32m            0.7340[0m | [32m   1.9498[0m | [32m          1.8635[0m | [32m     9.7687[0m | [32m            6.3913[0m | [32m     0.6269[0m | 
    2 | 170m37s | [35m-188049.21094[0m | [32m            0.2010[0m | [32m   1.2642[0m | [32m          3.4389[0m | [32m     8.9223[0m | [32m            9.9119[0m | [32m     0.5487[0m | 
    3 | 188m06s | [35m-186046.62812[0m | [32m            0.5770[0m | [32m   0.9262[0m | [32m          4.8631[0m | [32m     8.1638[0m | [32m            9.9059[0m | [32m     0.5769[0m | 
    4 | 04m34s | [35m-23304.43184[0m | [32m            0.4152[0m | [32m   1.4671[0m | [32m          0.8429[0m | [32m    14.1423[0m | [32m            1.1963[0m | [32m     0.6040[0m | 
    5 | 04m40s | -28375.83184 |             0.5698 |    1.8337 |           0.1558 |      6.8217 |             4.3094 |      0.8065 | 
[31mBayesian Optimization[0m
[94m------------------------------------