In [1]:
import os
import gc
import pandas as pd
import numpy as np
import csv
from timeit import default_timer as timer
import lightgbm as lgbm
import xgboost as xgb
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score

# Constants
NUM_FOLDS = 5

# pyGPGO
from pyGPGO.surrogates.GaussianProcess import GaussianProcess
from pyGPGO.covfunc import squaredExponential
from pyGPGO.acquisition import Acquisition
from pyGPGO.GPGO import GPGO

# Suppress warnings from pandas
import warnings
warnings.filterwarnings('ignore')

plt.style.use('fivethirtyeight')

In [2]:
""" Load and process inputs """
input_dir = os.path.join(os.curdir, 'data')
print('Input files:\n{}'.format(os.listdir(input_dir)))
print('Loading data sets...')

Input files:
['installments_payments.csv', 'POS_CASH_balance.csv', 'bureau.csv', 'credit_card_balance.csv', 'application_train.csv', 'bureau_balance.csv', 'application_test.csv', 'processed_input_data.csv', 'previous_application.csv']
Loading data sets...


In [3]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [4]:
sample_size = None
merged_df = reduce_mem_usage(pd.read_csv(os.path.join(input_dir, 'processed_input_data.csv'), nrows=sample_size))

Memory usage of dataframe is 1236.69 MB
Memory usage after optimization is: 494.68 MB
Decreased by 60.0%


In [5]:
# Separate metadata
meta_cols = ['SK_ID_CURR']
meta_df = merged_df[meta_cols]
merged_df.drop(columns=meta_cols, inplace=True)

In [6]:
def process_dataframe(input_df, encoder_dict=None):
    """ Process a dataframe into a form useable by LightGBM """

    # Label encode categoricals
    print('Label encoding categorical features...')
    categorical_feats = input_df.columns[input_df.dtypes == 'object']
    for feat in categorical_feats:
        encoder = LabelEncoder()
        input_df[feat] = encoder.fit_transform(input_df[feat].fillna('NULL'))
    print('Label encoding complete.')

    return input_df, categorical_feats.tolist(), encoder_dict

In [7]:
categorical_feats = merged_df.columns[merged_df.dtypes == 'object']

In [8]:
categorical_feats

Index(['CODE_GENDER', 'EMERGENCYSTATE_MODE', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY',
       'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'NAME_CONTRACT_TYPE',
       'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE',
       'NAME_INCOME_TYPE', 'NAME_TYPE_SUITE', 'OCCUPATION_TYPE',
       'ORGANIZATION_TYPE', 'WALLSMATERIAL_MODE', 'WEEKDAY_APPR_PROCESS_START',
       'NAME_CONTRACT_TYPE_PRVMAX', 'WEEKDAY_APPR_PROCESS_START_PRVMAX',
       'FLAG_LAST_APPL_PER_CONTRACT', 'NAME_CASH_LOAN_PURPOSE',
       'NAME_CONTRACT_STATUS', 'NAME_PAYMENT_TYPE', 'CODE_REJECT_REASON',
       'NAME_CLIENT_TYPE', 'NAME_GOODS_CATEGORY', 'NAME_PORTFOLIO',
       'NAME_PRODUCT_TYPE', 'CHANNEL_TYPE', 'NAME_SELLER_INDUSTRY',
       'NAME_YIELD_GROUP', 'NAME_CONTRACT_TYPE_PRVMIN',
       'WEEKDAY_APPR_PROCESS_START_PRVMIN',
       'FLAG_LAST_APPL_PER_CONTRACT_PRVMIN', 'NAME_CASH_LOAN_PURPOSE_PRVMIN',
       'NAME_CONTRACT_STATUS_PRVMIN', 'NAME_PAYMENT_TYPE_PRVMIN',
       'CODE_REJECT_REASON_PRVMIN', 'NAME_

In [9]:
for feat in categorical_feats:
        encoder = LabelEncoder()
        merged_df[feat] = encoder.fit_transform(merged_df[feat].fillna('NULL'))

In [10]:
categorical_feats.tolist()

['CODE_GENDER',
 'EMERGENCYSTATE_MODE',
 'FLAG_OWN_CAR',
 'FLAG_OWN_REALTY',
 'FONDKAPREMONT_MODE',
 'HOUSETYPE_MODE',
 'NAME_CONTRACT_TYPE',
 'NAME_EDUCATION_TYPE',
 'NAME_FAMILY_STATUS',
 'NAME_HOUSING_TYPE',
 'NAME_INCOME_TYPE',
 'NAME_TYPE_SUITE',
 'OCCUPATION_TYPE',
 'ORGANIZATION_TYPE',
 'WALLSMATERIAL_MODE',
 'WEEKDAY_APPR_PROCESS_START',
 'NAME_CONTRACT_TYPE_PRVMAX',
 'WEEKDAY_APPR_PROCESS_START_PRVMAX',
 'FLAG_LAST_APPL_PER_CONTRACT',
 'NAME_CASH_LOAN_PURPOSE',
 'NAME_CONTRACT_STATUS',
 'NAME_PAYMENT_TYPE',
 'CODE_REJECT_REASON',
 'NAME_CLIENT_TYPE',
 'NAME_GOODS_CATEGORY',
 'NAME_PORTFOLIO',
 'NAME_PRODUCT_TYPE',
 'CHANNEL_TYPE',
 'NAME_SELLER_INDUSTRY',
 'NAME_YIELD_GROUP',
 'NAME_CONTRACT_TYPE_PRVMIN',
 'WEEKDAY_APPR_PROCESS_START_PRVMIN',
 'FLAG_LAST_APPL_PER_CONTRACT_PRVMIN',
 'NAME_CASH_LOAN_PURPOSE_PRVMIN',
 'NAME_CONTRACT_STATUS_PRVMIN',
 'NAME_PAYMENT_TYPE_PRVMIN',
 'CODE_REJECT_REASON_PRVMIN',
 'NAME_CLIENT_TYPE_PRVMIN',
 'NAME_GOODS_CATEGORY_PRVMIN',
 'NAME_PORTFOLI

In [11]:
# Capture other categorical features not as object data types:
non_obj_categoricals = [
    'FONDKAPREMONT_MODE', 'HOUR_APPR_PROCESS_START', 'HOUSETYPE_MODE',
    'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE',
    'NAME_INCOME_TYPE', 'NAME_TYPE_SUITE', 'OCCUPATION_TYPE',
    'ORGANIZATION_TYPE', 'STATUS', 'NAME_CONTRACT_STATUS_CAVG',
    'WALLSMATERIAL_MODE', 'WEEKDAY_APPR_PROCESS_START', 'NAME_CONTRACT_TYPE_BAVG',
    'WEEKDAY_APPR_PROCESS_START_BAVG', 'NAME_CASH_LOAN_PURPOSE', 'NAME_CONTRACT_STATUS', 
    'NAME_PAYMENT_TYPE', 'CODE_REJECT_REASON', 'NAME_TYPE_SUITE_BAVG', 
    'NAME_CLIENT_TYPE', 'NAME_GOODS_CATEGORY', 'NAME_PORTFOLIO', 
    'NAME_PRODUCT_TYPE', 'CHANNEL_TYPE', 'NAME_SELLER_INDUSTRY', 
    'NAME_YIELD_GROUP', 'PRODUCT_COMBINATION', 'NAME_CONTRACT_STATUS_CCAVG',
    'CREDIT_ACTIVE', 'CREDIT_CURRENCY', 'CREDIT_TYPE'
]
categorical_feats = categorical_feats.tolist() + non_obj_categoricals

In [12]:
# Re-separate into train and test
len_train = 307511
train_df = merged_df[:len_train]
test_df = merged_df[len_train:]
#del merged_df

In [13]:
train_df = train_df.sample(n=20000, random_state=69)

In [14]:
""" Setup the LightGBM model """
target = train_df.pop('TARGET')
test_df.drop(columns='TARGET', inplace=True)
lgbm_train = lgbm.Dataset(data=train_df,
                          label=target,
                          categorical_feature=categorical_feats,
                          free_raw_data=False)

In [15]:
#lgbm_test = {
#    'learning_rate': 0.1,
#    'min_data_in_leaf': 30,
#    'num_leaves': 31,
#    'feature_fraction': 0.5,
#    'scale_pos_weight': 2,
#    'drop_rate': 0.02
#}

In [15]:
def evaluateModel(**kwargs):
    
    global ITER
    global OUT_FILE
    
    ITER+=1
  
    kwargs['min_data_in_leaf'] = int(round(kwargs['min_data_in_leaf']))
    kwargs['num_leaves'] = int(round(kwargs['num_leaves']))
    
    kwargs['boosting'] = 'dart'
    kwargs['application'] = 'binary'
    kwargs['max_depth'] = 8 #-1
    kwargs['learning_rate'] = .02
    #kwargs['is_unbalance'] = True
    kwargs['scale_pos_weight'] = 2
    kwargs['drop_rate'] = .02
    
    #print(kwargs)
    
    start = timer()
    
    cv_results = lgbm.cv(train_set=lgbm_train, 
                         params=kwargs, 
                         nfold=NUM_FOLDS,
                         num_boost_round=10000,
                         early_stopping_rounds=100,
                         stratified=True,
                         verbose_eval=False,
                         metrics=['auc'])
    
    run_time = timer() - start
    
    best_auc = np.max(cv_results['auc-mean'])
    
    optimum_boost_rounds = np.argmax(cv_results['auc-mean'])
    
    # Write to the csv file ('a' means append)
    HANDLE = open(OUT_FILE, 'a')
    writer = csv.writer(HANDLE)
    writer.writerow([ITER, best_auc, run_time, optimum_boost_rounds, kwargs])
    HANDLE.close()

    # return auc to BayesOpt
    return best_auc

In [17]:
# Open an iteration file for Bayesian Optimization
OUT_FILE = 'bayesOpt_dart_test6.csv'
HANDLE = open(OUT_FILE, 'w')
writer = csv.writer(HANDLE)

ITER = 0

# write header
headers = ['iteration', 'AUC', 'time','optimum_round', 'hyperparams', '(learning_rate=.02)']
writer.writerow(headers)
HANDLE.close()



In [18]:
# Test the objective function
#results = evaluateModel(lgbm_test)
#print('The cross validation score = {:.5f}.'.format(results))

# change num_leaves
#lgbm_test['num_leaves'] = 20
#results = evaluateModel(lgbm_test)
#print('The cross validation score = {:.5f}.'.format(results))

In [19]:
sexp = squaredExponential()
gp = GaussianProcess(sexp)
acq = Acquisition(mode = 'ExpectedImprovement')

In [20]:
#params = {'learning_rate':      ('cont', (.001, 0.25)),
#          'min_data_in_leaf':  ('cont', (20, 50)),
#          'num_leaves':        ('cont', (15, 40)),
#          'feature_fraction':   ('cont', (.4, 1)),
#          'scale_pos_weight':  ('cont', (1, 3)),
#          'drop_rate':      ('cont', (.0001, .05))
#         }


params = {'min_data_in_leaf':  ('int', [10, 600]),
          'num_leaves':        ('int', [10, 300]),
          'feature_fraction':   ('cont', [.2, 1.]),
          'reg_alpha':      ('cont', [0, 1]),
          'reg_lambda':     ('cont', [0, 1])
         }


In [21]:
gpgo = GPGO(gp, acq, evaluateModel, params)
gpgo.run(max_iter = 1000)

Evaluation 	 Proposed point 	  Current eval. 	 Best eval.
init   	 [ 91.          13.           0.42113504   0.91988518   0.25744217]. 	  0.7628410587875746 	 0.7628410587875746
init   	 [  4.73000000e+02   1.28000000e+02   8.89518838e-01   1.69964125e-01
   5.43837200e-01]. 	  0.7574178518186928 	 0.7628410587875746
init   	 [ 402.          181.            0.83344414    0.45589395    0.60400197]. 	  0.7569508920057768 	 0.7628410587875746
1      	 [ 409.          186.            0.44400733    0.64625311    0.53844756]. 	  0.762028428644 	 0.762841058788
2      	 [  3.02000000e+02   2.57000000e+02   3.93315050e-01   7.08418715e-02
   1.29470110e-01]. 	  0.760872684046 	 0.762841058788
3      	 [ 303.           92.            0.72641254    0.33550812    0.69333122]. 	  0.756117349198 	 0.762841058788
4      	 [  1.01000000e+02   2.34000000e+02   4.43008819e-01   9.84908137e-01
   1.97970455e-01]. 	  0.759688438391 	 0.762841058788
5      	 [ 118.          181.            0.4941915     0

63     	 [ 405.          206.            0.51664697    0.97114446    0.7013693 ]. 	  0.758912672618 	 0.763229722308
64     	 [  40.06500465  251.46773542    0.54267444    0.46414158    0.48366961]. 	  0.756069480672 	 0.763229722308
65     	 [  3.98327257e+02   2.05718618e+02   6.53181298e-01   3.24588182e-01
   6.42729574e-01]. 	  0.759034082053 	 0.763229722308
66     	 [ 303.27187978   91.01749233    0.76457313    0.47530282    0.55941592]. 	  0.757696348826 	 0.763229722308
67     	 [  3.08257441e+02   8.55891915e+01   1.00000000e+00   1.31859897e-01
   0.00000000e+00]. 	  0.756255137766 	 0.763229722308
68     	 [  5.21999986e+02   3.19542606e+01   7.27961329e-01   8.76101993e-01
   3.84603327e-01]. 	  0.758077956606 	 0.763229722308
69     	 [  47.71597202  252.26662037    0.46792523    0.3754564     0.60569727]. 	  0.759805563056 	 0.763229722308
70     	 [  4.18000000e+02   2.05000000e+02   8.22991300e-01   2.28549462e-01
   1.63416946e-01]. 	  0.756800832522 	 0.763229722308


129    	 [  4.07529355e+02   1.85193054e+02   1.00000000e+00   3.67094750e-01
   0.00000000e+00]. 	  0.756809288403 	 0.763667234676
130    	 [  1.10386598e+02   1.84454666e+02   7.86556333e-01   2.99724002e-02
   4.57808155e-01]. 	  0.755758743712 	 0.763667234676
131    	 [  5.12367863e+01   2.41453304e+02   2.00000000e-01   7.35982592e-03
   6.98863765e-01]. 	  0.758199039837 	 0.763667234676
132    	 [  4.10000000e+02   1.91000000e+02   3.11121014e-01   7.00959681e-01
   7.40319561e-01]. 	  0.762192148123 	 0.763667234676
133    	 [  3.11000000e+02   2.62000000e+02   7.36864959e-01   7.92108364e-01
   2.92621802e-01]. 	  0.75714761094 	 0.763667234676
134    	 [ 559.99068673   83.28993759    0.78925135    0.78767563    0.58139682]. 	  0.758634416117 	 0.763667234676
135    	 [  4.08000000e+02   2.40000000e+01   3.99430358e-01   9.09413322e-01
   2.36453333e-01]. 	  0.760354107663 	 0.763667234676
136    	 [  4.72467978e+02   1.27148755e+02   1.00000000e+00   3.93753325e-01
   6.171

194    	 [  1.01321004e+02   2.34488584e+02   9.66657788e-01   3.42858843e-01
   4.44002667e-02]. 	  0.755453488003 	 0.763667234676
195    	 [  3.99012537e+02   2.05582832e+02   1.00000000e+00   5.68746260e-01
   1.13659051e-01]. 	  0.757133214694 	 0.763667234676
196    	 [  4.08850648e+02   2.01628738e+02   3.40792761e-01   4.89972793e-01
   7.12632286e-01]. 	  0.761333435908 	 0.763667234676
197    	 [  2.97386810e+02   2.51035924e+02   2.26514855e-01   7.85691528e-01
   1.53091699e-01]. 	  0.761461255834 	 0.763667234676
198    	 [  3.81826760e+02   2.33379735e+02   3.28340691e-01   6.77979413e-01
   3.67771277e-01]. 	  0.760898850052 	 0.763667234676
199    	 [ 117.12042408  181.49953985    0.47727118    0.79730286    1.        ]. 	  0.758842767233 	 0.763667234676
200    	 [  4.08828922e+02   2.00705872e+02   5.60730482e-01   7.11644740e-01
   3.64717266e-01]. 	  0.758170313122 	 0.763667234676
201    	 [  2.23000000e+02   1.72000000e+02   7.91110464e-01   8.57469475e-01
   1.61

259    	 [ 416.31702934  179.95110956    0.74852646    0.7346136     0.55351251]. 	  0.756653464495 	 0.763667234676
260    	 [ 121.          193.            0.58904735    0.82609808    0.89550844]. 	  0.757021779026 	 0.763667234676
261    	 [  40.77698019  251.50397712    1.            0.49171989    1.        ]. 	  0.753664397705 	 0.763667234676
262    	 [  3.95595226e+02   2.22403883e+02   3.00855204e-01   5.98521206e-01
   4.82769946e-01]. 	  0.763364759975 	 0.763667234676
263    	 [  4.63563959e+02   1.41248633e+02   2.00000000e-01   3.70780124e-01
   0.00000000e+00]. 	  0.762176655787 	 0.763667234676
264    	 [ 147.89582994  118.51669778    0.80902436    0.90924946    0.42818697]. 	  0.756162161969 	 0.763667234676
265    	 [  4.02641736e+02   1.81450976e+02   2.00000000e-01   2.47145622e-01
   1.00000000e+00]. 	  0.761349259911 	 0.763667234676
266    	 [  4.11088810e+02   1.79208875e+02   3.45089133e-01   1.00000000e+00
   1.53074247e-01]. 	  0.76228689331 	 0.763667234676
2

324    	 [ 137.48033219  120.88239209    0.7011335     0.61403352    0.75585884]. 	  0.75789821137 	 0.763667234676
325    	 [ 117.58504154  187.88303697    0.84245265    0.6672541     0.56854773]. 	  0.757183303019 	 0.763667234676
326    	 [  4.02000000e+02   2.10000000e+02   8.32430037e-01   3.43346027e-01
   8.29366842e-01]. 	  0.756698673055 	 0.763667234676
327    	 [  5.59000000e+02   6.60000000e+01   2.19437596e-01   1.78010593e-01
   8.14791389e-01]. 	  0.760221755359 	 0.763667234676
328    	 [  1.47045890e+02   1.13504073e+02   2.00000000e-01   1.10100706e-01
   8.52615328e-01]. 	  0.761559165748 	 0.763667234676
329    	 [ 395.58851039  222.49475488    1.            1.            1.        ]. 	  0.757425235034 	 0.763667234676
330    	 [  3.96045574e+02   2.18281001e+02   5.40168743e-01   7.43381864e-01
   3.20917512e-01]. 	  0.759073856024 	 0.763667234676
331    	 [  3.06136069e+02   9.84323746e+01   2.80693416e-01   6.57041608e-02
   2.66527915e-01]. 	  0.760727283523 	 

390    	 [  4.10481458e+02   1.91300003e+02   3.30919885e-01   3.99575418e-02
   2.05861871e-01]. 	  0.760158041026 	 0.763667234676
391    	 [ 117.17775401  187.39678053    1.            1.            1.        ]. 	  0.754762135127 	 0.763667234676
392    	 [ 110.47081032  184.68143214    0.63607308    0.60977425    0.58993021]. 	  0.757437425065 	 0.763667234676
393    	 [  5.19816921e+02   2.49853031e+01   1.00000000e+00   7.53374026e-01
   3.17188248e-01]. 	  0.757539334348 	 0.763667234676
394    	 [ 109.41966785  183.9944038     0.46723341    0.71394383    0.2538677 ]. 	  0.757808291314 	 0.763667234676
395    	 [  3.72113253e+01   2.52193352e+02   2.00000000e-01   3.81378119e-01
   1.00000000e+00]. 	  0.759277324216 	 0.763667234676
396    	 [  3.02405326e+02   9.02696832e+01   1.00000000e+00   3.49048277e-01
   2.16466580e-01]. 	  0.755901903131 	 0.763667234676
397    	 [ 407.97847962   12.73383559    0.67918199    0.70932879    0.62050402]. 	  0.760557125731 	 0.763667234676


455    	 [  5.19833676e+02   2.54245205e+01   4.81341002e-01   4.05839073e-02
   2.17570971e-02]. 	  0.760295678464 	 0.763667234676
456    	 [  2.14414728e+02   1.73130993e+02   8.98150072e-01   8.34256019e-01
   1.72769328e-02]. 	  0.755488111331 	 0.763667234676
457    	 [  2.14000028e+02   1.83999986e+02   6.00994201e-01   2.13117005e-01
   8.02729935e-01]. 	  0.758760631947 	 0.763667234676
458    	 [  4.17370031e+02   2.10322256e+02   2.00000000e-01   8.33942887e-01
   7.99533821e-01]. 	  0.761579531553 	 0.763667234676
459    	 [ 214.94041844  183.53076636    0.60859479    0.25781068    0.72257868]. 	  0.758198811648 	 0.763667234676
460    	 [  3.13026102e+02   8.10000001e+01   6.31621952e-01   3.07231989e-01
   2.51536568e-01]. 	  0.757308656913 	 0.763667234676
461    	 [  3.73277334e+01   2.52618091e+02   3.96534486e-01   7.76813658e-01
   1.87319555e-01]. 	  0.759175306355 	 0.763667234676
462    	 [  46.11516535  247.52067106    0.98945229    0.5455248     0.99999468]. 	  

520    	 [  3.20824387e+02   2.61530770e+02   2.00000000e-01   7.23806645e-01
   6.00260190e-01]. 	  0.763615847669 	 0.763667234676
521    	 [ 298.8841147   250.91877092    0.71841373    0.58267159    0.99360816]. 	  0.75684217893 	 0.763667234676
522    	 [ 107.71762243  179.95872436    0.44106555    0.37449384    0.39175837]. 	  0.75964007803 	 0.763667234676
523    	 [  5.23038646e+02   3.25592791e+01   2.00000000e-01   0.00000000e+00
   6.49337950e-01]. 	  0.760406406505 	 0.763667234676
524    	 [ 401.44736633  223.01959053    0.89508563    0.46926927    0.74752656]. 	  0.756986066719 	 0.763667234676
525    	 [ 156.99998296  118.00000828    0.29172906    0.71692685    0.71580001]. 	  0.760510012048 	 0.763667234676
526    	 [  2.23586325e+02   1.72026898e+02   1.00000000e+00   1.29339751e-01
   0.00000000e+00]. 	  0.757097830506 	 0.763667234676
527    	 [  3.12503896e+02   8.12332026e+01   2.00000000e-01   9.19979044e-01
   0.00000000e+00]. 	  0.761467473715 	 0.763667234676
52

585    	 [  3.21033071e+02   2.62698185e+02   2.58974840e-01   1.00000000e+00
   1.27523762e-01]. 	  0.763625211989 	 0.763682389556
586    	 [  4.19047183e+02   1.75624261e+02   4.00210883e-01   3.95595022e-01
   4.41053906e-01]. 	  0.760496334216 	 0.763682389556
587    	 [  5.60237938e+02   8.42316873e+01   2.84630753e-01   4.66995855e-01
   7.58393236e-02]. 	  0.761500368079 	 0.763682389556
588    	 [  4.13218787e+02   2.15003308e+02   3.35180572e-01   1.91729280e-01
   1.99471224e-01]. 	  0.761664231786 	 0.763682389556
589    	 [  3.51999989e+02   3.09999784e+01   8.38950812e-01   6.52524052e-01
   1.61291352e-01]. 	  0.755828789876 	 0.763682389556
590    	 [ 349.05496847   27.00125798    0.45375779    0.48238204    0.74276551]. 	  0.76032956593 	 0.763682389556
591    	 [  5.59304984e+02   8.46308411e+01   2.00000000e-01   3.59185302e-03
   0.00000000e+00]. 	  0.762412408614 	 0.763682389556
592    	 [  4.66446595e+02   1.46833923e+02   3.98417359e-01   8.64017214e-01
   1.528

651    	 [ 401.88238665  213.96755269    0.50737577    0.77634279    0.40982237]. 	  0.758962466474 	 0.763682389556
652    	 [ 293.35079507  258.75865521    0.49635234    0.55266023    0.40930295]. 	  0.758150628776 	 0.763682389556
653    	 [  2.14188574e+02   1.83331826e+02   1.00000000e+00   1.09110069e-01
   1.56435659e-01]. 	  0.75688529727 	 0.763682389556
654    	 [ 292.83583988  258.56819391    1.            0.29789908    0.        ]. 	  0.756573353066 	 0.763682389556
655    	 [ 381.15401447  233.58185714    0.94206304    0.            0.        ]. 	  0.755750438616 	 0.763682389556
656    	 [  2.04358146e+02   1.68744199e+02   7.10406249e-01   2.19438692e-01
   1.04348014e-01]. 	  0.756526529973 	 0.763682389556
657    	 [  1.20741968e+02   1.92561950e+02   9.37435018e-01   7.30588828e-01
   7.69439480e-02]. 	  0.756502734007 	 0.763682389556
658    	 [  4.09608541e+02   1.87082814e+02   2.35982520e-01   0.00000000e+00
   5.62409699e-01]. 	  0.762623643838 	 0.763682389556
6

718    	 [  5.66653016e+02   7.63659495e+01   8.60512525e-01   4.04482448e-01
   8.28940970e-01]. 	  0.756547019195 	 0.763825995356
719    	 [ 301.53219774  258.42423199    0.90336715    0.86932313    1.        ]. 	  0.755554767746 	 0.763825995356
720    	 [  1.36659151e+02   1.20756576e+02   2.70743639e-01   2.24655639e-01
   3.94494979e-02]. 	  0.762101791552 	 0.763825995356
721    	 [  4.58441888e+01   2.52848801e+02   8.01304588e-01   6.24182424e-01
   1.85327174e-01]. 	  0.756823108425 	 0.763825995356
722    	 [  3.12443322e+02   9.67136957e+01   8.54081876e-01   8.21043142e-01
   1.66451052e-01]. 	  0.756699408463 	 0.763825995356
723    	 [ 338.11982965   27.78314472    0.36503792    1.            1.        ]. 	  0.761523710732 	 0.763825995356
724    	 [  1.11919749e+02   1.84608213e+02   1.00000000e+00   7.11072434e-03
   1.00000000e+00]. 	  0.756989729589 	 0.763825995356
725    	 [  3.76075247e+02   2.27057963e+02   2.07905810e-01   4.97815553e-01
   4.23511553e-01]. 	  

783    	 [ 215.94315163  181.97497614    0.67564388    0.68907171    0.2872394 ]. 	  0.756511946784 	 0.763825995356
784    	 [ 466.59508231  146.67801534    1.            0.72374884    0.91862687]. 	  0.757630664239 	 0.763825995356
785    	 [  4.12534715e+02   2.40398059e+01   2.00000000e-01   3.73522863e-01
   3.22182125e-01]. 	  0.762899864383 	 0.763825995356
786    	 [  3.49210834e+02   2.71034766e+01   2.00000000e-01   0.00000000e+00
   9.60926231e-02]. 	  0.761388171489 	 0.763825995356
787    	 [  2.80622207e+02   2.07570759e+02   4.82278748e-01   9.78529130e-03
   3.94531784e-01]. 	  0.761155624439 	 0.763825995356
788    	 [ 396.46210718  216.87755258    1.            0.            0.92191116]. 	  0.756478942418 	 0.763825995356
789    	 [  4.15933732e+02   1.79543992e+02   2.00000000e-01   5.73292825e-01
   0.00000000e+00]. 	  0.761371186349 	 0.763825995356
790    	 [ 293.44802659  257.66551452    0.5187134     0.            0.42976679]. 	  0.759235478028 	 0.763825995356


849    	 [  4.67865262e+02   1.17593136e+02   5.95596568e-01   3.70693839e-01
   1.50993199e-01]. 	  0.758038114902 	 0.766887118298
850    	 [ 120.12543333  170.66749713    0.63941666    0.19009458    0.85801954]. 	  0.757896540805 	 0.766887118298
851    	 [ 139.03210302  120.18366828    0.2           0.            0.8763345 ]. 	  0.76149185481 	 0.766887118298
852    	 [ 411.19359687   19.39367501    1.            0.41727155    0.49352316]. 	  0.756784665963 	 0.766887118298
853    	 [  46.39350963  253.69554733    0.97488033    0.37185407    0.        ]. 	  0.754299638475 	 0.766887118298
854    	 [  4.61246604e+01   2.53609333e+02   2.00000000e-01   9.99703267e-01
   3.26427729e-01]. 	  0.759324619958 	 0.766887118298
855    	 [  3.73242006e+02   2.30425890e+02   2.00000000e-01   0.00000000e+00
   9.53693758e-02]. 	  0.760917369365 	 0.766887118298
856    	 [  3.37743492e+02   2.77800712e+01   2.00000000e-01   4.63601876e-01
   5.64167883e-01]. 	  0.763509517974 	 0.766887118298
8

915    	 [  1.10668481e+02   1.83294703e+02   2.00000000e-01   1.62179307e-01
   1.00000000e+00]. 	  0.761492036427 	 0.766887118298
916    	 [  1.35717378e+02   1.20059646e+02   4.18636356e-01   2.11003988e-02
   1.44495315e-01]. 	  0.760482487758 	 0.766887118298
917    	 [ 203.45587219  169.64175965    0.93823003    0.31662825    0.        ]. 	  0.756110427401 	 0.766887118298
918    	 [ 108.6628108   184.9488865     0.50113201    0.49111629    0.97641792]. 	  0.755419080157 	 0.766887118298
919    	 [  4.12531602e+02   2.40567695e+01   1.00000000e+00   8.99113331e-01
   1.50429534e-01]. 	  0.756908313185 	 0.766887118298
920    	 [  3.93281641e+02   1.92273577e+02   2.18979811e-01   4.53806358e-01
   8.81798627e-01]. 	  0.760608171563 	 0.766887118298
921    	 [  38.92999096  252.57070053    1.            0.9201287     0.3746132 ]. 	  0.754173597741 	 0.766887118298
922    	 [  5.60486957e+02   6.43403843e+01   2.84953006e-01   1.36762804e-02
   1.00000000e+00]. 	  0.760594988522 	

981    	 [ 146.48362439  118.63035477    0.99919257    0.96022288    0.86210236]. 	  0.755373379936 	 0.766887118298
982    	 [  5.58369943e+02   6.56011096e+01   7.64525266e-01   8.73305988e-01
   4.65849125e-01]. 	  0.758442425533 	 0.766887118298
983    	 [  1.19459903e+02   2.00763431e+02   1.00000000e+00   4.14896298e-01
   1.16024827e-01]. 	  0.755837591805 	 0.766887118298
984    	 [  4.02212683e+02   2.23774750e+02   7.61588667e-01   8.86747308e-01
   3.16147366e-01]. 	  0.757099621547 	 0.766887118298
985    	 [  3.74722662e+02   2.28343686e+02   8.97945923e-01   0.00000000e+00
   2.99203710e-01]. 	  0.756612535742 	 0.766887118298
986    	 [ 111.91006873  183.60029961    0.2           0.36994286    1.        ]. 	  0.759689621329 	 0.766887118298
987    	 [ 559.98982888   66.70779338    0.6432508     0.76677383    0.90687998]. 	  0.759741550916 	 0.766887118298
988    	 [ 119.06148783  177.11183079    0.69248515    0.44948761    0.        ]. 	  0.75829603892 	 0.766887118298
9

In [16]:
# Open an iteration file for Random Search
OUT_FILE = 'RandomSearch_dart.csv'
HANDLE = open(OUT_FILE, 'w')
writer = csv.writer(HANDLE)

ITER = 0

# write header
headers = ['iteration', 'AUC', 'time','optimum_round', 'hyperparams', '(learning_rate=.02)']
writer.writerow(headers)
HANDLE.close()


In [26]:
import random
#random.seed(42)
kwargs = {}

kwargs['min_data_in_leaf'] = random.randrange(10, 600)
kwargs['num_leaves'] = random.randrange(10, 300)
kwargs['feature_fraction'] = random.uniform(.2, 1)
kwargs['reg_alpha'] = random.uniform(0, 1)
kwargs['reg_beta'] = random.uniform(0, 1)

#params = {'min_data_in_leaf':  ('int', [10, 600]),
#          'num_leaves':        ('int', [10, 300]),
#          'feature_fraction':   ('cont', [.2, 1.]),
#          'reg_alpha':      ('cont', [0, 1]),
#          'reg_lambda':     ('cont', [0, 1])
#         }

kwargs

{'min_data_in_leaf': 280,
 'num_leaves': 32,
 'feature_fraction': 0.7837854293550544,
 'reg_alpha': 0.5362280914547007,
 'reg_beta': 0.9731157639793706}

In [27]:
import random
random.seed(42)

kwargs = {}

for i in range(1,1001):
    
    
    kwargs['boosting'] = 'dart'
    kwargs['application'] = 'binary'
    kwargs['max_depth'] = 8 #-1
    kwargs['learning_rate'] = .02
    #kwargs['is_unbalance'] = True
    kwargs['scale_pos_weight'] = 2
    kwargs['drop_rate'] = .02
    
    kwargs['min_data_in_leaf'] = random.randrange(10, 600)
    kwargs['num_leaves'] = random.randrange(10, 300)
    kwargs['feature_fraction'] = random.uniform(.2, 1)
    kwargs['reg_alpha'] = random.uniform(0, 1)
    kwargs['reg_beta'] = random.uniform(0, 1)
    
    start = timer()
    
    cv_results = lgbm.cv(train_set=lgbm_train, 
                         params=kwargs, 
                         nfold=NUM_FOLDS,
                         num_boost_round=10000,
                         early_stopping_rounds=100,
                         stratified=True,
                         verbose_eval=False,
                         metrics=['auc'])
    
    run_time = timer() - start
    
    best_auc = np.max(cv_results['auc-mean'])
    
    optimum_boost_rounds = np.argmax(cv_results['auc-mean'])
    
    # Write to the csv file
    HANDLE = open(OUT_FILE, 'a')
    writer = csv.writer(HANDLE)
    writer.writerow([i, best_auc, run_time, optimum_boost_rounds, kwargs])
    
HANDLE.close()
    

In [16]:
# Open an iteration file for Final Ensemble Search
OUT_FILE = 'EnsembleSearch_dart.csv'
HANDLE = open(OUT_FILE, 'w')
writer = csv.writer(HANDLE)

ITER = 0

# write header
headers = ['iteration', 'AUC', 'time','optimum_round', 'hyperparams', '(learning_rate=.02)']
writer.writerow(headers)
HANDLE.close()

In [17]:
from pyGPGO.covfunc import matern32
from pyGPGO.covfunc import rationalQuadratic
from pyGPGO.surrogates.Ensemble import Ensemble
from pyGPGO.surrogates.RandomForest import RandomForest
from pyGPGO.surrogates.BoostedTrees import BoostedTrees

In [18]:
np.random.seed(22)
cov1 = squaredExponential()
cov2 = rationalQuadratic()
cov3 = matern32()
gpsqExp = GaussianProcess(cov1)
gpratQuad = GaussianProcess(cov2)
gpmat32 = GaussianProcess(cov3)
acq = Acquisition(mode = 'ExpectedImprovement')
ens = Ensemble([gpsqExp, gpratQuad, gpmat32])

In [19]:
params = {'min_data_in_leaf':  ('int', [10, 600]),
          'num_leaves':        ('int', [10, 300]),
          'feature_fraction':   ('cont', [.2, 1.]),
          'reg_alpha':      ('cont', [0, 1]),
          'reg_lambda':     ('cont', [0, 1])
         }


In [20]:
gpgo = GPGO(ens, acq, evaluateModel, params)
gpgo.run(max_iter=1000)

Evaluation 	 Proposed point 	  Current eval. 	 Best eval.
init   	 [ 142.           94.            0.87736719    0.89331018    0.23463765]. 	  0.7544321471129382 	 0.7608759790767868
init   	 [  5.67000000e+02   1.03000000e+02   2.08421499e-01   5.61203697e-01
   8.13726187e-01]. 	  0.7608759790767868 	 0.7608759790767868
init   	 [  1.43000000e+02   3.70000000e+01   5.53658783e-01   1.28924972e-01
   3.41609738e-01]. 	  0.7602409923938138 	 0.7608759790767868
1      	 [  1.50999983e+02   1.09999968e+02   9.98501381e-01   7.98551405e-01
   6.27388817e-02]. 	  0.756050078659 	 0.760875979077
2      	 [  1.50510563e+02   1.09110962e+02   9.91988553e-01   8.03562577e-01
   7.20750802e-02]. 	  0.755992469829 	 0.760875979077
3      	 [  1.50003926e+02   1.09968347e+02   1.00000000e+00   8.03322931e-01
   3.85017760e-05]. 	  0.756106670023 	 0.760875979077
4      	 [  1.42016814e+02   3.72134551e+01   5.37193355e-01   1.32633768e-01
   3.83629301e-01]. 	  0.758914553119 	 0.760875979077
5  

64     	 [  5.68560368e+02   1.02500854e+02   8.16074938e-01   1.00000000e+00
   3.42691227e-01]. 	  0.75832292791 	 0.763268040828
65     	 [ 150.63572037  111.07056609    0.87048822    0.30570372    0.        ]. 	  0.755946678594 	 0.763268040828
66     	 [ 149.81668609  110.45547789    0.3428142     0.            0.26712187]. 	  [92m0.763288948442[0m 	 0.763288948442
67     	 [ 150.49113359  110.87621937    0.2           0.94256549    0.4637402 ]. 	  0.761205751127 	 0.763288948442
68     	 [ 150.61199499  110.99524352    0.82734177    0.2606261     1.        ]. 	  0.755364658651 	 0.763288948442
69     	 [ 151.7644489   110.56084981    0.2           0.89119396    0.        ]. 	  0.762574033587 	 0.763288948442
70     	 [ 567.78549983  104.43945171    1.            1.            0.96180875]. 	  0.756812110801 	 0.763288948442
71     	 [ 142.01627195   93.36324251    1.            0.            0.4473636 ]. 	  0.757295538782 	 0.763288948442
72     	 [ 141.78377615   93.25241455   

131    	 [ 143.67979392   93.87601925    0.2           0.            0.28525094]. 	  0.760644633172 	 0.76408285664
132    	 [ 151.85213612  110.30210157    0.94021803    0.            0.        ]. 	  0.75576852428 	 0.76408285664
133    	 [  1.52525810e+02   1.09226061e+02   2.54075239e-01   1.17849249e-01
   0.00000000e+00]. 	  0.761832085433 	 0.76408285664
134    	 [ 143.44802908   96.17435183    1.            0.33545197    0.9574585 ]. 	  0.756722100835 	 0.76408285664
135    	 [  5.68951118e+02   1.02882599e+02   2.00000000e-01   6.62844211e-01
   0.00000000e+00]. 	  0.761887494874 	 0.76408285664
136    	 [ 149.51174277   18.31220351    0.82545313    0.21266337    0.27834691]. 	  0.757479619518 	 0.76408285664
137    	 [ 143.03872085   92.97474704    1.            0.41145008    0.9472408 ]. 	  0.756011558237 	 0.76408285664
138    	 [  1.50065459e+02   1.07722391e+02   2.00000000e-01   1.41352733e-01
   9.66225215e-01]. 	  0.762877756093 	 0.76408285664
139    	 [ 149.34058829  

198    	 [  5.68779641e+02   1.05281053e+02   3.66796375e-01   1.00000000e+00
   9.53393458e-01]. 	  0.760049236585 	 0.764249797351
199    	 [  1.41016284e+02   9.71130064e+01   1.00000000e+00   0.00000000e+00
   3.92783352e-02]. 	  0.756462356338 	 0.764249797351
200    	 [  5.68250154e+02   1.05450178e+02   8.57203955e-01   7.58008457e-01
   1.61257123e-01]. 	  0.758109462693 	 0.764249797351
201    	 [ 149.43691212  108.12739578    0.81988317    0.            0.71894292]. 	  0.754806367019 	 0.764249797351
202    	 [ 149.61597494  111.54970866    0.2           0.33856563    0.65112532]. 	  0.763435016353 	 0.764249797351
203    	 [ 140.01357606   93.77582044    1.            0.56346326    0.21400542]. 	  0.753854173566 	 0.764249797351
204    	 [  5.68498502e+02   1.04752919e+02   6.51241409e-01   1.00000000e+00
   4.66114421e-01]. 	  0.757593752524 	 0.764249797351
205    	 [  5.67870743e+02   1.05557455e+02   2.00000000e-01   4.08139621e-01
   6.86876631e-01]. 	  0.761974138022 	

265    	 [  5.70781745e+02   1.03571928e+02   9.88448264e-01   2.01213860e-01
   5.81847791e-01]. 	  0.757317480957 	 0.764249797351
266    	 [ 143.478721     91.37175178    0.42719323    0.31300376    1.        ]. 	  0.759977503205 	 0.764249797351
267    	 [ 142.18696929   91.35048189    0.2           0.27389168    1.        ]. 	  0.763267991456 	 0.764249797351
268    	 [ 145.78341344   97.10549427    0.2           0.24859328    0.        ]. 	  0.760808182508 	 0.764249797351
269    	 [ 145.50091346   94.25050827    0.2           1.            0.46454503]. 	  0.763390013621 	 0.764249797351
270    	 [ 149.31724508  107.33390745    0.2           1.            0.5010347 ]. 	  0.76227858027 	 0.764249797351
271    	 [ 145.11749442   97.81151748    0.2           0.79787139    0.        ]. 	  0.761104972493 	 0.764249797351
272    	 [ 570.44609126  104.7914794     1.            0.65807079    0.80607373]. 	  0.757479835546 	 0.764249797351
273    	 [  1.43863010e+02   9.17083647e+01   2.0

332    	 [ 142.83405772   91.4078318     0.89617716    0.            0.54828521]. 	  0.755124714319 	 0.764249797351
333    	 [ 146.14538846   94.04763072    0.7659186     0.55548673    1.        ]. 	  0.755397924528 	 0.764249797351
334    	 [ 146.51598229   98.2112885     0.2           0.17931811    0.41450841]. 	  0.762233539457 	 0.764249797351
335    	 [  1.47804006e+02   1.07621534e+02   1.00000000e+00   1.00000000e+00
   9.57643929e-02]. 	  0.755981196918 	 0.764249797351
336    	 [  5.71309726e+02   1.01385348e+02   9.80078163e-01   8.30968110e-01
   2.64487006e-01]. 	  0.757512276597 	 0.764249797351
337    	 [ 142.95837592   98.61472006    0.2           0.48411882    0.43245627]. 	  0.761949445539 	 0.764249797351
338    	 [ 148.90355526  111.18841836    0.82314861    0.            0.18371627]. 	  0.755473265851 	 0.764249797351
339    	 [ 138.83277403   94.50054345    0.64830147    0.28250739    0.5674834 ]. 	  0.755268924588 	 0.764249797351
340    	 [ 152.86442387  109.765

400    	 [ 141.18791314   98.4394391     0.2           0.39508078    0.        ]. 	  0.761867298862 	 0.764452701641
401    	 [ 150.7299852   106.56770664    0.2           0.19613259    0.57485215]. 	  0.762262728275 	 0.764452701641
402    	 [  1.47879790e+02   1.10825661e+02   1.00000000e+00   8.78275788e-02
   7.67185821e-01]. 	  0.756025837504 	 0.764452701641
403    	 [ 147.65116743   95.39228508    0.2           0.30996889    0.42829947]. 	  0.76304747802 	 0.764452701641
404    	 [ 151.26095443  106.88056662    1.            0.77203573    0.44302976]. 	  0.753473824523 	 0.764452701641
405    	 [ 138.9960209    93.20182841    1.            0.            0.96895371]. 	  0.755654081913 	 0.764452701641
406    	 [ 142.36266765   99.03890377    1.            0.47407063    0.76285562]. 	  0.75624502324 	 0.764452701641
407    	 [  1.47406297e+02   1.06908290e+02   2.00000000e-01   6.87789322e-02
   1.00000000e+00]. 	  0.763324384311 	 0.764452701641
408    	 [ 146.24649531   99.77301

468    	 [ 150.58938307  112.00080058    0.52962516    1.            1.        ]. 	  0.758585749018 	 0.764452701641
469    	 [  5.65439904e+02   1.02414257e+02   1.00000000e+00   5.06302469e-02
   1.00000000e+00]. 	  0.757517951267 	 0.764452701641
470    	 [ 151.9852917   107.19507153    0.2           0.20026679    0.28236243]. 	  0.761598204012 	 0.764452701641
471    	 [ 147.10218955  105.64122132    0.2           1.            1.        ]. 	  0.762215195622 	 0.764452701641
472    	 [ 138.5239514    95.36740408    1.            1.            0.20671316]. 	  0.75632256523 	 0.764452701641
473    	 [  1.38660733e+02   9.39548968e+01   4.70457936e-01   1.00000000e+00
   1.13781694e-02]. 	  0.759017958601 	 0.764452701641
474    	 [ 145.74861206  107.71466559    0.2           1.            0.83077915]. 	  0.760569347052 	 0.764452701641
475    	 [ 142.07547091  100.37030512    1.            0.71163956    0.43834907]. 	  0.755720812963 	 0.764452701641
476    	 [ 572.344705    101.5696

535    	 [ 150.70987409  105.92094943    0.512324      1.            0.73471176]. 	  0.759565704446 	 0.764452701641
536    	 [  1.47780213e+02   1.00294839e+02   8.38066943e-01   1.41404848e-01
   3.46122545e-01]. 	  0.754326766061 	 0.764452701641
537    	 [ 146.52025749   93.03140817    0.63039696    0.96013956    0.54650876]. 	  0.755965042086 	 0.764452701641
538    	 [  1.46088237e+02   1.09291155e+02   3.91264368e-01   1.00000000e+00
   1.42002152e-01]. 	  0.761411666116 	 0.764452701641
539    	 [  1.40513893e+02   9.95327175e+01   2.14786015e-01   0.00000000e+00
   1.39644821e-01]. 	  0.76070951814 	 0.764452701641
540    	 [  1.39114053e+02   9.74087493e+01   2.00000000e-01   4.50694193e-01
   1.95153051e-02]. 	  0.76089266178 	 0.764452701641
541    	 [  5.70802615e+02   1.05827859e+02   4.64834294e-01   2.49763200e-01
   5.01652149e-01]. 	  0.759734181801 	 0.764452701641
542    	 [  5.69599515e+02   1.06444385e+02   2.00000000e-01   5.62380538e-01
   9.14963477e-01]. 	  0.

602    	 [ 145.14833219   92.10869797    0.2           0.            1.        ]. 	  0.760912950356 	 0.764452701641
603    	 [  5.68229994e+02   1.06793740e+02   3.44253247e-01   0.00000000e+00
   0.00000000e+00]. 	  0.761473805909 	 0.764452701641
604    	 [ 141.71010538   90.44628562    0.2           1.            1.        ]. 	  0.760842839935 	 0.764452701641
605    	 [ 147.93946576  103.77762197    0.40483597    0.59900551    1.        ]. 	  0.760238980213 	 0.764452701641
606    	 [ 148.30721635  100.34752732    0.93637937    1.            0.        ]. 	  0.755552037195 	 0.764452701641
607    	 [ 144.3375332   103.35696868    0.2           0.84440763    0.96445185]. 	  0.762107705224 	 0.764452701641
608    	 [ 143.4738027   102.86928751    0.2           0.            0.39962103]. 	  0.760114996602 	 0.764452701641
609    	 [ 565.87073255  105.02110568    0.63754796    1.            0.        ]. 	  0.758065102969 	 0.764452701641
610    	 [ 142.40238932  101.85684562    0.72152

670    	 [ 146.12370779  102.59473958    0.2           0.            0.        ]. 	  0.760946916133 	 0.764452701641
671    	 [ 141.19492201  101.02144289    0.99827946    0.63097138    1.        ]. 	  0.755119264077 	 0.764452701641
672    	 [  5.70135955e+02   9.88115790e+01   2.03311415e-01   1.00000000e+00
   6.79796193e-01]. 	  0.760512428884 	 0.764452701641
673    	 [ 147.8323574   103.91748382    0.2           1.            0.        ]. 	  0.763808872 	 0.764452701641
674    	 [ 144.20375163  106.14866438    0.2           0.38410782    0.87218636]. 	  0.762830430389 	 0.764452701641
675    	 [ 143.63399607  104.97077525    0.46997289    1.            0.82207212]. 	  0.758184676891 	 0.764452701641
676    	 [ 143.7677117   102.91182895    0.83029167    1.            1.        ]. 	  0.755921153283 	 0.764452701641
677    	 [ 148.94555233  103.26274516    1.            0.            0.94704151]. 	  0.755446564289 	 0.764452701641
678    	 [ 564.89486981  102.21952843    0.57723813

738    	 [ 136.80569942   96.40839566    0.5963509     0.64008299    0.5743757 ]. 	  0.757245965018 	 0.764452701641
739    	 [  1.50759308e+02   1.01305033e+02   1.00000000e+00   1.05130303e-01
   6.32446349e-01]. 	  0.755030205174 	 0.764452701641
740    	 [ 150.16326693  106.13944383    1.            1.            0.        ]. 	  0.755576174296 	 0.764452701641
741    	 [ 149.09786207   96.77734639    0.8755257     0.36328247    0.59629887]. 	  0.755950133183 	 0.764452701641
742    	 [  1.50108138e+02   9.71609053e+01   2.00000000e-01   8.43645642e-01
   1.16242995e-02]. 	  0.762094457671 	 0.764452701641
743    	 [  1.36572071e+02   9.52712478e+01   2.83476026e-01   2.22575781e-01
   5.97786712e-03]. 	  0.761013301699 	 0.764452701641
744    	 [  1.51093703e+02   1.02905245e+02   1.00000000e+00   9.53593460e-02
   1.00000000e+00]. 	  0.753179903374 	 0.764452701641
745    	 [  5.66304383e+02   9.94635393e+01   2.00000000e-01   4.08628463e-01
   0.00000000e+00]. 	  0.760970897223 	

805    	 [  1.41964417e+02   1.05043285e+02   2.00000000e-01   6.68497681e-01
   5.04887957e-02]. 	  0.761670244847 	 0.764452701641
806    	 [ 145.53549994  110.12223085    1.            0.62282484    0.25475151]. 	  0.75570841991 	 0.764452701641
807    	 [ 572.6344178   104.95696803    1.            0.64499148    0.58818227]. 	  0.756660488175 	 0.764452701641
808    	 [ 140.96991334  102.82349417    0.75031401    0.81726838    0.15408962]. 	  0.756247848665 	 0.764452701641
809    	 [ 136.4867444    95.70141818    0.99986098    0.            0.81752927]. 	  0.756778970478 	 0.764452701641
810    	 [ 141.91999047  104.81836688    1.            0.            0.21638919]. 	  0.755478959306 	 0.764452701641
811    	 [ 150.7961689    96.87368332    0.2           0.16604798    0.71792661]. 	  0.763118093502 	 0.764452701641
812    	 [ 151.8698829   103.49939324    0.2           0.37704213    1.        ]. 	  0.762561173709 	 0.764452701641
813    	 [  5.72575827e+02   1.05508100e+02   2.0

873    	 [ 153.5209473   107.14776179    0.7140679     0.61578747    1.        ]. 	  0.757399887942 	 0.764452701641
874    	 [ 145.25587472   91.31683696    0.2           0.88966388    1.        ]. 	  0.762118642996 	 0.764452701641
875    	 [ 136.38948264   95.06568019    0.22881495    0.60285167    1.        ]. 	  0.762591582147 	 0.764452701641
876    	 [ 145.65812666  102.41218804    1.            1.            0.        ]. 	  0.754865638413 	 0.764452701641
877    	 [ 139.06437565   99.39027167    0.2           0.            0.48891514]. 	  0.762789011929 	 0.764452701641
878    	 [ 149.5997354   112.6384793     0.77116307    0.79794062    1.        ]. 	  0.755791358584 	 0.764452701641
879    	 [ 144.65100804  107.83278554    0.96198818    1.            1.        ]. 	  0.755905374302 	 0.764452701641
880    	 [ 150.41774206  112.37456412    0.38415142    0.85620291    0.        ]. 	  0.761011257922 	 0.764452701641
881    	 [  5.65524026e+02   1.05658095e+02   2.00000000e-01   1

941    	 [ 146.38951675  103.88405481    1.            0.            0.        ]. 	  0.755761313739 	 0.764452701641
942    	 [ 571.78382768   99.50778916    0.83718943    0.            1.        ]. 	  0.757074290775 	 0.764452701641
943    	 [ 153.2175539   104.91785165    0.65537702    1.            0.        ]. 	  0.757096111363 	 0.764452701641
944    	 [  1.43775315e+02   8.91218777e+01   2.00000000e-01   1.16935340e-01
   4.72583790e-01]. 	  0.76279190213 	 0.764452701641
945    	 [ 148.48801638   93.50310299    0.2           0.47496789    1.        ]. 	  0.761542268134 	 0.764452701641
946    	 [ 153.67754382  109.99561106    0.2           0.2995055     0.9597226 ]. 	  0.764330415983 	 0.764452701641
947    	 [  5.65853466e+02   9.85334421e+01   7.03904372e-01   2.44241755e-03
   8.59603204e-01]. 	  0.75860712401 	 0.764452701641
948    	 [ 137.20038457  100.66165195    0.5863144     1.            0.21742859]. 	  0.757910070282 	 0.764452701641
949    	 [  1.37316300e+02   9.841

In [None]:
clf = lgbm.train(train_set=lgbm_train,
                 params=lgbm_params,
                 num_boost_round=optimum_boost_rounds)
                 
""" Predict on test set and create submission """
y_pred = clf.predict(test_df)
out_df = pd.DataFrame({'SK_ID_CURR': meta_df['SK_ID_CURR'][len_train:], 'TARGET': y_pred})
out_df.to_csv('submission.csv', index=False)

# Plot importances
fig, (ax, ax1) = plt.subplots(1, 2, figsize=[11, 7])
lgbm.plot_importance(clf, ax=ax, max_num_features=20, importance_type='split')
lgbm.plot_importance(clf, ax=ax1, max_num_features=20, importance_type='gain')
ax.set_title('Importance by splits')
ax1.set_title('Importance by gain')
plt.tight_layout()
plt.savefig('feature_importance.png')