In [6]:
# imports
import pandas as pd
import numpy as np
import os
from bayes_opt import BayesianOptimization
import xgboost as xgb

# data columns used for the booster
factors = ['property_id', 'bedrooms', 'bathrooms', 'sqft','longitude', 'latitude','zipcode', 'elevation', 'garage'
                          ,'year_built', 'level','dist_to_park','dist_to_golf_course', 'has_pool'
                          ,'date_closed','multifamily', 'hoa_fees', 'lot']

In [12]:
def XGBcv(max_depth, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, alpha):
    folds = 5
    paramt = {
        'alpha': max(alpha, 0),
        'gamma': max(gamma, 0),
        'max_depth': int(max_depth),
        'eta': 0.1,
        'objective': 'reg:linear',
        'silent': True,
        'subsample': max(min(subsample, 1), 0),
        'colsample_bytree': max(min(colsample_bytree, 1), 0),
        'min_child_weight': int(min_child_weight),
        'max_delta_step': max_delta_step.astype(int),
        'seed': 2017,
        'updater': 'grow_gpu' 
    }

    out = xgb.cv(paramt,
           dtrain,
           num_boost_round=20000,
           nfold=folds,
           verbose_eval=None,
           metrics="mae",
           show_stdv=True,
           callbacks=[xgb.callback.early_stop(50)])
    
    print(out, file=log_file)
    
    return -out['test-mae-mean'].values[-1]


In [None]:
params = { 'max_depth': (5, 15),
                 'gamma': (0.0, 10.0),
                 'min_child_weight': (1, 20),
                 'max_delta_step': (0, 5),
                 'subsample': (0.5, 1.0),
                 'colsample_bytree' :(0.1, 1.0),
                 'alpha': (0, 10)
               }

XGB_BOpt = BayesianOptimization(XGBcv, params)

df = pd.read_csv('CSV_backups/PH-sales.csv')

msk = np.random.rand(len(df)) < 0.03  # pick 3% of the dataset for a quick run, 100% would be entire dataset
df = df[msk]

dtrain = xgb.DMatrix(df[factors].values, label=df.price, feature_names=factors)


XGB_BOpt.maximize(init_points=5, n_iter=25)

[31mInitialization[0m
[94m----------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   max_delta_step |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-mae' will be used for early stopping.

Will train until test-mae hasn't improved in 50 rounds.
