#### Hyperparameter tuning using Bayesian Optimization

In [1]:
import numpy as np
import pandas as pd
import gc
import time
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [10]:
import os
def get_dataset():
    app_train_path = os.path.join('D:\workspace1\Kaggle_Advanced_ML\data', 'application_train.csv')
    app_test_path = os.path.join('D:\workspace1\Kaggle_Advanced_ML\data', 'application_test.csv')
    prev_path = os.path.join('D:\workspace1\Kaggle_Advanced_ML\data', 'previous_application.csv')
    app_train = pd.read_csv(app_train_path)
    app_test = pd.read_csv(app_test_path)
    apps = pd.concat([app_train, app_test])
    prev = pd.read_csv(prev_path)
    
    return apps, prev

apps, prev = get_dataset()

In [11]:
def get_apps_processed(apps):
    
    # EXT_SOURCE_X FEATURE processing
    apps['APPS_EXT_SOURCE_MEAN'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps['APPS_EXT_SOURCE_STD'].fillna(apps['APPS_EXT_SOURCE_STD'].mean())
    
    # AMT_CREDIT ratio Feature processing
    apps['APPS_ANNUITY_CREDIT_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_CREDIT']
    apps['APPS_GOODS_CREDIT_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_CREDIT']
    
    # AMT_INCOME_TOTAL ratio Feature processing
    apps['APPS_ANNUITY_INCOME_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CREDIT_INCOME_RATIO'] = apps['AMT_CREDIT']/apps['AMT_INCOME_TOTAL']
    apps['APPS_GOODS_INCOME_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CNT_FAM_INCOME_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['CNT_FAM_MEMBERS']
    
    # DAYS_BIRTH, DAYS_EMPLOYED ratio processing
    apps['APPS_EMPLOYED_BIRTH_RATIO'] = apps['DAYS_EMPLOYED']/apps['DAYS_BIRTH']
    apps['APPS_INCOME_EMPLOYED_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_EMPLOYED']
    apps['APPS_INCOME_BIRTH_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_BIRTH']
    apps['APPS_CAR_BIRTH_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_BIRTH']
    apps['APPS_CAR_EMPLOYED_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_EMPLOYED']
    
    return apps

In [12]:
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier

def get_prev_processed(prev):
    # 대출 신청 금액과 실제 대출액/대출 상품금액 차이 및 비율
    prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
    prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
    prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
    # prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
    prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
    
    prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
    prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
    # 첫번째 만기일과 마지막 만기일까지의 기간
    prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
    # 매월 납부 금액과 납부 횟수 곱해서 전체 납부 금액 구함. 
    all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
    # 전체 납부 금액 대비 AMT_CREDIT 비율을 구하고 여기에 다시 납부횟수로 나누어서 이자율 계산. 
    prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
        
    return prev
    
    
def get_prev_amt_agg(prev):
    # 새롭게 생성된 대출 신청액 대비 다른 금액 차이 및 비율로 aggregation 수행. 
    agg_dict = {
         # 기존 컬럼. 
        'SK_ID_CURR':['count'],
        'AMT_CREDIT':['mean', 'max', 'sum'],
        'AMT_ANNUITY':['mean', 'max', 'sum'], 
        'AMT_APPLICATION':['mean', 'max', 'sum'],
        'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
        'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
        'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'DAYS_DECISION': ['min', 'max', 'mean'],
        'CNT_PAYMENT': ['mean', 'sum'],
        # 가공 컬럼
        'PREV_CREDIT_DIFF':['mean', 'max', 'sum'], 
        'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
        'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
        'PREV_GOODS_APPL_RATIO':['mean', 'max'],
        'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
        'PREV_INTERESTS_RATE':['mean', 'max']
    }

    prev_group = prev.groupby('SK_ID_CURR')
    prev_amt_agg = prev_group.agg(agg_dict)

    # multi index 컬럼을 '_'로 연결하여 컬럼명 변경
    prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]
    
    return prev_amt_agg

def get_prev_refused_appr_agg(prev):
    # 원래 groupby 컬럼 + 세부 기준 컬럼으로 groupby 수행. 세분화된 레벨로 aggregation 수행 한 뒤에 unstack()으로 컬럼레벨로 변형. 
    prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby([ 'SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
    prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
    # 컬럼명 변경. 
    prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT' ]
    # NaN값은 모두 0으로 변경. 
    prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
    
    return prev_refused_appr_agg

    

def get_prev_agg(prev):
    prev = get_prev_processed(prev)
    prev_amt_agg = get_prev_amt_agg(prev)
    prev_refused_appr_agg = get_prev_refused_appr_agg(prev)
    
    # prev_amt_agg와 조인. 
    prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
    # SK_ID_CURR별 과거 대출건수 대비 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성. 
    prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    # 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT' 컬럼 drop 
    prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
    
    return prev_agg

def get_apps_all_with_prev_agg(apps, prev):
    apps_all =  get_apps_processed(apps)
    prev_agg = get_prev_agg(prev)
    print('prev_agg shape:', prev_agg.shape)
    print('apps_all before merge shape:', apps_all.shape)
    apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
    print('apps_all after merge with prev_agg shape:', apps_all.shape)
    
    return apps_all

def get_apps_all_encoded(apps_all):
    object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
    for column in object_columns:
        apps_all[column] = pd.factorize(apps_all[column])[0]
    
    return apps_all

def get_apps_all_train_test(apps_all):
    apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
    apps_all_test = apps_all[apps_all['TARGET'].isnull()]

    apps_all_test = apps_all_test.drop('TARGET', axis=1)
    
    return apps_all_train, apps_all_test
    
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=2000,
                learning_rate=0.01,
                num_leaves=32,
                colsample_bytree=0.8,
                subsample=0.8,
                max_depth=8,
                reg_alpha=0.04,
                reg_lambda=0.07,
                min_child_weight=40,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

### final dataset generation and encoding, train_test_data set split, train, validation dataset split

In [13]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)

  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)


### Bayesian Optimization

In [14]:
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier

#### Input range

In [20]:
bayesian_params = {
    'max_depth': (6, 16),
    'num_leaves': (24, 64),
    'min_child_samples':(10, 200),
    'min_child_weight':(1, 50),
    'subsample':(0.5, 1.0),
    'colsample_bytree':(0.5, 1.0),
    'max_bin':(10, 500),
    'reg_lambda':(0.001, 10),
    'reg_alpha':(0.01, 50)
}

#### Define the function from which we get the maximum value
- Get inputs for every iteration and output roc_auc_score

In [36]:
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample, 
                 colsample_bytree, max_bin, reg_lambda, reg_alpha):
    params = {
        "n_estimators":500, "learning_rate":0.02,
        'max_depth': int(round(max_depth)), # bayesianoptiization input: real number-> need to be rounded to get integer
        'num_leaves': int(round(num_leaves)),
        'min_child_samples': int(round(min_child_samples)),
        'min_child_weight': int(round(min_child_weight)),
        'subsample': max(min(subsample, 1), 0), # limit 0~1
        'colsample_bytree': max(min(colsample_bytree, 1), 0),
        'max_bin': max(int(round(max_bin)), 10), #minimum 10
        'reg_lambda': max(reg_lambda, 0),
        'reg_alpha': max(reg_alpha, 0),
        'n_jobs':-1
    }
    lgb_model = LGBMClassifier(**params)
    lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], 
                  eval_metric='auc', verbose=100, early_stopping_rounds = 100)
    valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
    roc_auc = roc_auc_score(valid_y, valid_proba)
    
    return roc_auc

#### Define BeysianOptimization object, do the iteration

In [37]:
lgbBO = BayesianOptimization(lgb_roc_eval, bayesian_params, random_state=0)
lgbBO.maximize(init_points=5, n_iter=25)

|   iter    |  target   | colsam... |  max_bin  | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.769662	training's binary_logloss: 0.246032	valid_1's auc: 0.755483	valid_1's binary_logloss: 0.248917
[200]	training's auc: 0.787253	training's binary_logloss: 0.238487	valid_1's auc: 0.766224	valid_1's binary_logloss: 0.244243
[300]	training's auc: 0.798898	training's binary_logloss: 0.23398	valid_1's auc: 0.771467	valid_1's binary_logloss: 0.242344
[400]	training's auc: 0.807869	training's binary_logloss: 0.23065	valid_1's auc: 0.773972	valid_1's binary_logloss: 0.241458
[500]	training's auc: 0.815952	training's binary_logloss: 0.227669	valid_1's auc: 0.775806	valid_1's binary_logloss: 0.240831
Did not meet early stopping. Best iteratio

#### Iteration results

In [38]:
lgbBO.res

[{'target': 0.7758055093230539,
  'params': {'colsample_bytree': 0.7744067519636624,
   'max_bin': 360.44278952248555,
   'max_depth': 12.027633760716439,
   'min_child_samples': 113.52780476941041,
   'min_child_weight': 21.75908516760633,
   'num_leaves': 49.835764522666246,
   'reg_alpha': 21.884984691022,
   'reg_lambda': 8.917838234820016,
   'subsample': 0.9818313802505146}},
 {'target': 0.7757909289675659,
  'params': {'colsample_bytree': 0.6917207594128889,
   'max_bin': 397.94526866050563,
   'max_depth': 11.288949197529044,
   'min_child_samples': 117.92846660784714,
   'min_child_weight': 46.35423527634039,
   'num_leaves': 26.841442327915477,
   'reg_alpha': 4.36559369208002,
   'reg_lambda': 0.20316375600581688,
   'subsample': 0.916309922773969}},
 {'target': 0.7771779879367707,
  'params': {'colsample_bytree': 0.8890783754749252,
   'max_bin': 436.30595264094137,
   'max_depth': 15.78618342232764,
   'min_child_samples': 161.8401272011775,
   'min_child_weight': 23.61248

#### Inputs that makes the functions' maximum

In [39]:
target_list = []
for result in lgbBO.res:
    target = result['target']
    target_list.append(target)
print(target_list)
# index of input parameters that makes function's output maximum?
print('maximum target index:', np.argmax(np.array(target_list)))

[0.7758055093230539, 0.7757909289675659, 0.7771779879367707, 0.7748333416046418, 0.774035094542375, 0.7773908017189786, 0.7755362516633085, 0.7772142029411041, 0.7763952615906466, 0.7734141467631326, 0.7756008426857747, 0.7771050305636831, 0.7772365047377109, 0.7773549981224318, 0.7767492867273098, 0.775929160395352, 0.7767777231064307, 0.7743690019971274, 0.7769920411479967, 0.7754041675256128, 0.7780565770624691, 0.7775081101221989, 0.7771685255576835, 0.7769187946775454, 0.7772230140569003, 0.777808998528086, 0.7771167374184544, 0.7764292464153193, 0.7763562620386317, 0.777091004576216]
maximum target index: 20


In [40]:
# using extracted index-> parameter extraction
max_dict = lgbBO.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.7780565770624691, 'params': {'colsample_bytree': 0.596684514717646, 'max_bin': 403.18943474881877, 'max_depth': 12.907058961217114, 'min_child_samples': 158.84286216603994, 'min_child_weight': 7.338287048901049, 'num_leaves': 58.11656509027165, 'reg_alpha': 0.6681705096965275, 'reg_lambda': 9.355145759543333, 'subsample': 0.8010238277727275}}


### Retest using optimized parameters

In [42]:
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']
    
    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
        n_jobs=-1,
        n_estimators=1000,
        learning_rate=0.02,
        max_depth=13,
        num_leaves=58,
        colsample_bytree=0.597,
        subsample=0.801,
        max_bin=403,
        reg_alpha=0.668,
        reg_lambda=9.355,
        min_child_weight=7.338,
        min_child_samples=159,
        silent=-1,
        verbose=-1
    )
    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], 
            eval_metric='auc', verbose=100, early_stopping_rounds=100)
    
    return clf

In [43]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)

  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)
Training until validation scores don't improve for 100 rounds
[100]	training's auc: 0.780021	training's binary_logloss: 0.243308	valid_1's auc: 0.759823	valid_1's binary_logloss: 0.24777
[200]	training's auc: 0.801	training's binary_logloss: 0.233945	valid_1's auc: 0.769864	valid_1's binary_logloss: 0.242865
[300]	training's auc: 0.817107	training's binary_logloss: 0.227748	valid_1's auc: 0.774335	valid_1's binary_logloss: 0.241209
[400]	training's auc: 0.830791	training's binary_logloss: 0.222602	valid_1's auc: 0.777089	valid_1's binary_logloss: 0.240257
[500]	training's auc: 0.842255	training's binary_logloss: 0.218274	valid_1's auc: 0.778059	valid_1's binary_logloss: 0.239907
[600]	training's auc: 0.852436	training's binary_logloss: 0.2144	valid_1's auc: 0.778536	valid_1's binary_logloss: 0.239737
[700]	t

In [44]:
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:,1]
apps_all_test['TARGET']=preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('bayesian_optimization_with_app_prev.csv', index=False)

### Hyper parameter re-tuning with Cross validation

In [50]:
bayesian_params = {
    'max_depth':(6, 16),
    'num_leaves':(24, 64),
    'min_data_in_leaf':(10, 200), # min_child_samples in scikit-learn wraaper
    'min_child_weight':(1, 50),
    'bagging_fraction':(0.5, 1.0), # subsample
    'feature_fraction':(0.5, 1.0), # colsample_bytree
    'max_bin':(10, 500),
    'lambda_l2':(0.001, 10), #reg_lambda
    'lambda_l1':(0.01, 50) # reg_alpha
}

In [52]:
import lightgbm as lgb

train_data = lgb.Dataset(data=ftr_app, label=target_app, free_raw_data=False)
def lgb_roc_eval_cv(max_depth, num_leaves, min_data_in_leaf, min_child_weight, bagging_fraction, 
                 feature_fraction,  max_bin, lambda_l2, lambda_l1): 
    params = {
        "num_iterations":500, "learning_rate":0.02,
        'early_stopping_rounds':100, 'metric':'auc',
        'max_depth': int(round(max_depth)), 
        'num_leaves': int(round(num_leaves)), 
        'min_data_in_leaf': int(round(min_data_in_leaf)),
        'min_child_weight': int(round(min_child_weight)),
        'bagging_fraction': max(min(bagging_fraction, 1), 0), 
        'feature_fraction': max(min(feature_fraction, 1), 0),
        'max_bin':  max(int(round(max_bin)),10),
        'lambda_l2': max(lambda_l2,0),
        'lambda_l1': max(lambda_l1, 0),
        'n_jobs':-1
    }
    
    # python lightgbm's cv method
    # scikit-learn cross-val-score -> early stopping is impossible
    cv_result = lgb.cv(params, train_data, nfold=3, seed=0, verbose_eval=100, early_stopping_rounds=50, metrics=['auc'])
    
    return max(cv_result['auc-mean'])

In [None]:
lgbBO = BayesianOptimization(lgb_roc_eval_cv, bayesian_params)
lgbBO.maximize(init_points=5, n_iter=25)
lgbBO.res

|   iter    |  target   | baggin... | featur... | lambda_l1 | lambda_l2 |  max_bin  | max_depth | min_ch... | min_da... | num_le... |
-------------------------------------------------------------------------------------------------------------------------------------




You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28387
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28387
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28387
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 166
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753321 + 0.00235057
[200]	cv_agg's auc: 0.764203 + 0.00203196
[300]	cv_agg's auc: 0.768652 + 0.00194861
[400]	cv_agg's auc: 0.770725 + 0.00192334
[500]	cv_agg's auc: 0.771955 + 0.0020375
| [0m 1       [0m | [0m 0.772   [0m | [0m 0.8787  [0m | [0m 0



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17894
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17894
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17894
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 169
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753902 + 0.0022159
[200]	cv_agg's auc: 0.762517 + 0.00185629
[3



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 23832
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 23832
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 23832
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 169
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.752942 + 0.00235047
[200]	cv_agg's auc: 0.764117 + 0.00214467
[



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4616
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4616
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4616
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 164
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753698 + 0.00228798
[200]	cv_agg's auc: 0.763247 + 0.00198224
[300



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12906
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12906
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 169
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12906
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 169
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.751879 + 0.00221258
[200]	cv_agg's auc: 0.761602 + 0.00217493
[



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28697
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28697
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28697
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 166
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.750697 + 0.00262902
[200]	cv_agg's auc: 0.762832 + 0.00252327
[



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37730
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37730
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 37730
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 166
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.751538 + 0.00265583
[200]	cv_agg's auc: 0.763135 + 0.00222992
[



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28693
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28693
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 164
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28693
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 164
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753905 + 0.00171364
[200]	cv_agg's auc: 0.762377 + 0.00153694
[



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23430
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 167
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23430
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 167
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23430
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 167
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.752546 + 0.00208275
[200]	cv_agg's auc: 0.76203 + 0.00203743
[300]	cv_agg's auc: 0.766691 + 0.00205098
[400]	cv_agg's auc: 0.76918 + 0.0020931
[500]	cv_agg's auc: 0.770827 + 0.00195919
| [0m 9       [0m | [0m 0.7708  [0m | [0m 0.7788  [0m | [0m 0.7



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23746
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23746
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 166
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23746
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 166
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753102 + 0.00231981
[200]	cv_agg's auc: 0.761911 + 0.00200985
[300]	cv_agg's auc: 0.766635 + 0.00209159
[400]	cv_agg's auc: 0.769147 + 0.00217418
[500]	cv_agg's auc: 0.770578 + 0.00197506
| [0m 10      [0m | [0m 0.7706  [0m | [0m 0.6242  [0m | [0m 



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3949
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 170
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3949
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 170
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3949
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 170
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.753034 + 0.00196197
[200]	cv_agg's auc: 0.761663 + 0.00182786
[300



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13332
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 170
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13332
[LightGBM] [Info] Number of data points in the train set: 205007, number of used features: 170
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13332
[LightGBM] [Info] Number of data points in the train set: 205008, number of used features: 170
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[LightGBM] [Info] Start training from score 0.080729
[100]	cv_agg's auc: 0.75244 + 0.0020586
[200]	cv_agg's auc: 0.761177 + 0.00223481
[30

In [None]:
target_list = []
for result in lgbBO.res:
    target = result['target']
    target_list.append(target)
print(target_list)
print('maximum target index:', np.argmax(np.array(target_list)))