# Using Bayesian Optimization for tuning the hyperparameters
- data set of application and prev.application

# Loading libraries and datasets. 
- Copying FE functions from previous application data

In [3]:
import numpy as np
import pandas as pd
import gc
import time
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [4]:
def get_dataset():
    app_train = pd.read_csv('dataset/application_train.csv')
    app_test = pd.read_csv('dataset/application_test.csv')
    apps = pd.concat([app_train, app_test])
    prev = pd.read_csv('dataset/previous_application.csv')

    return apps, prev

apps, prev = get_dataset()
    

# Copy feature engineering function from previous application data

In [5]:
def get_apps_processed(apps):
    
    # EXT_SOURCE_X 
    apps['APPS_EXT_SOURCE_MEAN'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
    apps['APPS_EXT_SOURCE_STD'] = apps['APPS_EXT_SOURCE_STD'].fillna(apps['APPS_EXT_SOURCE_STD'].mean())
      
    # AMT_CREDIT 
    apps['APPS_ANNUITY_CREDIT_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_CREDIT']
    apps['APPS_GOODS_CREDIT_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_CREDIT']
    
    # AMT_INCOME_TOTAL 
    apps['APPS_ANNUITY_INCOME_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CREDIT_INCOME_RATIO'] = apps['AMT_CREDIT']/apps['AMT_INCOME_TOTAL']
    apps['APPS_GOODS_INCOME_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_INCOME_TOTAL']
    apps['APPS_CNT_FAM_INCOME_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['CNT_FAM_MEMBERS']
    
    # DAYS_BIRTH, DAYS_EMPLOYED 
    apps['APPS_EMPLOYED_BIRTH_RATIO'] = apps['DAYS_EMPLOYED']/apps['DAYS_BIRTH']
    apps['APPS_INCOME_EMPLOYED_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_EMPLOYED']
    apps['APPS_INCOME_BIRTH_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_BIRTH']
    apps['APPS_CAR_BIRTH_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_BIRTH']
    apps['APPS_CAR_EMPLOYED_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_EMPLOYED']
       
    return apps

# Declaration of functions that encode and generate the final set of data after processing the previous application data

In [6]:
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier

def get_prev_processed(prev):
    # The difference and ratio between the loan application amount and the actual loan amount/loan product amount
    prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
    prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
    prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
    # prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
    prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
    
    prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
    prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
    # the period from the first due date to the last due date
    prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
    # Calculate the total amount paid by multiplying the monthly amount by the number of payments.
    all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
    # Calculate the interest rate by obtaining the AMT_CREDIT ratio to the total amount paid and dividing it by the number of payments again.
    prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
        
    return prev
    
    
def get_prev_amt_agg(prev):
    # Aggregation is carried out with a difference and ratio of different amounts compared to the newly created loan application amount.
    agg_dict = {
         # aggregation with the existing columns
        'SK_ID_CURR':['count'],
        'AMT_CREDIT':['mean', 'max', 'sum'],
        'AMT_ANNUITY':['mean', 'max', 'sum'], 
        'AMT_APPLICATION':['mean', 'max', 'sum'],
        'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
        'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
        'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'DAYS_DECISION': ['min', 'max', 'mean'],
        'CNT_PAYMENT': ['mean', 'sum'],
        # aggregation with the pre-processed columns
        'PREV_CREDIT_DIFF':['mean', 'max', 'sum'], 
        'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
        'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
        'PREV_GOODS_APPL_RATIO':['mean', 'max'],
        'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
        'PREV_INTERESTS_RATE':['mean', 'max']
    }

    prev_group = prev.groupby('SK_ID_CURR')
    prev_amt_agg = prev_group.agg(agg_dict)

    # change col names with underbar _
    prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]
    
    return prev_amt_agg

def get_prev_refused_appr_agg(prev):
    # Perform groupby with the original groupby column + detailed reference column. Aggregates at a granular level and then transforms to an unstack() column level.
    prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby([ 'SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
    prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
    # change col names
    prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT' ]
    # NaN into 0
    prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
    
    return prev_refused_appr_agg

    

def get_prev_agg(prev):
    prev = get_prev_processed(prev)
    prev_amt_agg = get_prev_amt_agg(prev)
    prev_refused_appr_agg = get_prev_refused_appr_agg(prev)
    prev_days365_agg = get_prev_days365_agg(prev)
    
    # join with prev_amt_agg
    prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
    prev_agg = prev_agg.merge(prev_days365_agg, on='SK_ID_CURR', how='left')
    # Generation of APPROVED_COUNT and REFUSED_COUNT ratio compared to past loans by SK_ID_CURR.
    prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
    # Drop 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'
    prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
    
    return prev_agg

def get_apps_all_with_prev_agg(apps, prev):
    apps_all =  get_apps_processed(apps)
    prev_agg = get_prev_agg(prev)
    print('prev_agg shape:', prev_agg.shape)
    print('apps_all before merge shape:', apps_all.shape)
    apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
    print('apps_all after merge with prev_agg shape:', apps_all.shape)
    
    return apps_all

def get_apps_all_encoded(apps_all):
    object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
    for column in object_columns:
        apps_all[column] = pd.factorize(apps_all[column])[0]
    
    return apps_all

def get_apps_all_train_test(apps_all):
    apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
    apps_all_test = apps_all[apps_all['TARGET'].isnull()]

    apps_all_test = apps_all_test.drop('TARGET', axis=1)
    
    return apps_all_train, apps_all_test
    
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=2000,
                learning_rate=0.01,
                num_leaves=32,
                colsample_bytree=0.8,
                subsample=0.8,
                max_depth=8,
                reg_alpha=0.04,
                reg_lambda=0.07,
                min_child_weight=40,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

# Encoding, Splitting train and test, and Splitting train and valid before the final dataset

In [7]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)


  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)


#### Bayesian Optimization 

In [8]:
# bayesian optimization 패키지 설치
!conda install bayesian-optimization



In [9]:
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier

# Set the range of testing per each hyper-parameter

In [10]:
# parameter 별로 search할 범위를 설정. 
bayesian_params = {
    'max_depth': (6, 16), 
    'num_leaves': (24, 64), 
    'min_child_samples': (10, 200), 
    'min_child_weight':(1, 50),
    'subsample':(0.5, 1.0),
    'colsample_bytree': (0.5, 1.0),
    'max_bin':(10, 500),
    'reg_lambda':(0.001, 10),
    'reg_alpha': (0.01, 50) 
}

# Declaration of the function to get the best roc_auc_score
- Enter hyperparameter at every iteration to learn the classifier and return the loc_auc_score value

In [11]:
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample, 
                colsample_bytree,max_bin, reg_lambda, reg_alpha):
    params = {
        "n_estimators":500, "learning_rate":0.02,
        'max_depth': int(round(max_depth)), #  Change integer hyperparameters to integer types because real-world values are entered at the time of call
        'num_leaves': int(round(num_leaves)), 
        'min_child_samples': int(round(min_child_samples)),
        'min_child_weight': int(round(min_child_weight)),
        'subsample': max(min(subsample, 1), 0), 
        'colsample_bytree': max(min(colsample_bytree, 1), 0),
        'max_bin':  max(int(round(max_bin)),10),
        'reg_lambda': max(reg_lambda,0),
        'reg_alpha': max(reg_alpha, 0)
    }
    lgb_model = LGBMClassifier(**params)
    lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
    roc_auc = roc_auc_score(valid_y, valid_proba)
    
    return roc_auc   

# Bayesian Optimization object
- The object performs iterations for search of input value with maximum function return value

In [12]:
#  Create Bayesian Optimization by setting the function to perform the object and the parameter range to search.
lgbBO = BayesianOptimization(lgb_roc_eval,bayesian_params , random_state=0)
# Perform etherification to infer the input value whose function return value is maximum.
lgbBO.maximize(init_points=5, n_iter=25)

|   iter    |  target   | colsam... |  max_bin  | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------




[100]	training's auc: 0.769939	training's binary_logloss: 0.245972	valid_1's auc: 0.755365	valid_1's binary_logloss: 0.248955
[200]	training's auc: 0.787803	training's binary_logloss: 0.238321	valid_1's auc: 0.766241	valid_1's binary_logloss: 0.244245
[300]	training's auc: 0.799182	training's binary_logloss: 0.233888	valid_1's auc: 0.771322	valid_1's binary_logloss: 0.2424
[400]	training's auc: 0.808279	training's binary_logloss: 0.230482	valid_1's auc: 0.773931	valid_1's binary_logloss: 0.241467
[500]	training's auc: 0.816375	training's binary_logloss: 0.227514	valid_1's auc: 0.775402	valid_1's binary_logloss: 0.240963
| [0m 1       [0m | [0m 0.7754  [0m | [0m 0.7744  [0m | [0m 360.4   [0m | [0m 12.03   [0m | [0m 113.5   [0m | [0m 21.76   [0m | [0m 49.84   [0m | [0m 21.88   [0m | [0m 8.918   [0m | [0m 0.9818  [0m |




[100]	training's auc: 0.762698	training's binary_logloss: 0.247398	valid_1's auc: 0.753688	valid_1's binary_logloss: 0.249103
[200]	training's auc: 0.780369	training's binary_logloss: 0.240433	valid_1's auc: 0.765612	valid_1's binary_logloss: 0.2443
[300]	training's auc: 0.791026	training's binary_logloss: 0.236421	valid_1's auc: 0.771186	valid_1's binary_logloss: 0.242295
[400]	training's auc: 0.7992	training's binary_logloss: 0.2335	valid_1's auc: 0.773979	valid_1's binary_logloss: 0.241311
[500]	training's auc: 0.806039	training's binary_logloss: 0.231038	valid_1's auc: 0.775665	valid_1's binary_logloss: 0.240713
| [95m 2       [0m | [95m 0.7757  [0m | [95m 0.6917  [0m | [95m 397.9   [0m | [95m 11.29   [0m | [95m 117.9   [0m | [95m 46.35   [0m | [95m 26.84   [0m | [95m 4.366   [0m | [95m 0.2032  [0m | [95m 0.9163  [0m |




[100]	training's auc: 0.776213	training's binary_logloss: 0.243747	valid_1's auc: 0.757416	valid_1's binary_logloss: 0.247941
[200]	training's auc: 0.797209	training's binary_logloss: 0.234929	valid_1's auc: 0.768588	valid_1's binary_logloss: 0.243224
[300]	training's auc: 0.81243	training's binary_logloss: 0.229169	valid_1's auc: 0.773282	valid_1's binary_logloss: 0.241607
[400]	training's auc: 0.825253	training's binary_logloss: 0.22435	valid_1's auc: 0.776174	valid_1's binary_logloss: 0.240627
[500]	training's auc: 0.83611	training's binary_logloss: 0.220261	valid_1's auc: 0.777334	valid_1's binary_logloss: 0.240203
| [95m 3       [0m | [95m 0.7773  [0m | [95m 0.8891  [0m | [95m 436.3   [0m | [95m 15.79   [0m | [95m 161.8   [0m | [95m 23.61   [0m | [95m 55.22   [0m | [95m 5.923   [0m | [95m 6.4     [0m | [95m 0.5717  [0m |




[100]	training's auc: 0.766354	training's binary_logloss: 0.246786	valid_1's auc: 0.753878	valid_1's binary_logloss: 0.24926
[200]	training's auc: 0.783371	training's binary_logloss: 0.239678	valid_1's auc: 0.765124	valid_1's binary_logloss: 0.244595
[300]	training's auc: 0.793705	training's binary_logloss: 0.235747	valid_1's auc: 0.770047	valid_1's binary_logloss: 0.242805
[400]	training's auc: 0.80153	training's binary_logloss: 0.232819	valid_1's auc: 0.77258	valid_1's binary_logloss: 0.241896
[500]	training's auc: 0.808258	training's binary_logloss: 0.23033	valid_1's auc: 0.77434	valid_1's binary_logloss: 0.241283
| [0m 4       [0m | [0m 0.7743  [0m | [0m 0.9723  [0m | [0m 265.7   [0m | [0m 10.15   [0m | [0m 60.27   [0m | [0m 38.94   [0m | [0m 42.25   [0m | [0m 28.43   [0m | [0m 0.1889  [0m | [0m 0.8088  [0m |




[100]	training's auc: 0.766135	training's binary_logloss: 0.247248	valid_1's auc: 0.753922	valid_1's binary_logloss: 0.249567
[200]	training's auc: 0.781825	training's binary_logloss: 0.240309	valid_1's auc: 0.7644	valid_1's binary_logloss: 0.244861
[300]	training's auc: 0.791873	training's binary_logloss: 0.236432	valid_1's auc: 0.769686	valid_1's binary_logloss: 0.242926
[400]	training's auc: 0.79932	training's binary_logloss: 0.233708	valid_1's auc: 0.7722	valid_1's binary_logloss: 0.242042
[500]	training's auc: 0.805819	training's binary_logloss: 0.231355	valid_1's auc: 0.774045	valid_1's binary_logloss: 0.241404
| [0m 5       [0m | [0m 0.774   [0m | [0m 0.806   [0m | [0m 312.3   [0m | [0m 15.44   [0m | [0m 139.5   [0m | [0m 18.62   [0m | [0m 41.48   [0m | [0m 34.88   [0m | [0m 0.6032  [0m | [0m 0.8334  [0m |




[100]	training's auc: 0.770439	training's binary_logloss: 0.246587	valid_1's auc: 0.754679	valid_1's binary_logloss: 0.249647
[200]	training's auc: 0.786489	training's binary_logloss: 0.239186	valid_1's auc: 0.764401	valid_1's binary_logloss: 0.245006
[300]	training's auc: 0.797238	training's binary_logloss: 0.234967	valid_1's auc: 0.769533	valid_1's binary_logloss: 0.243115
[400]	training's auc: 0.805581	training's binary_logloss: 0.231839	valid_1's auc: 0.772351	valid_1's binary_logloss: 0.242133
[500]	training's auc: 0.812758	training's binary_logloss: 0.229145	valid_1's auc: 0.774174	valid_1's binary_logloss: 0.241493
| [0m 6       [0m | [0m 0.7742  [0m | [0m 0.7214  [0m | [0m 488.6   [0m | [0m 13.29   [0m | [0m 182.2   [0m | [0m 1.521   [0m | [0m 57.64   [0m | [0m 41.28   [0m | [0m 4.601   [0m | [0m 0.6146  [0m |




[100]	training's auc: 0.775879	training's binary_logloss: 0.244395	valid_1's auc: 0.758721	valid_1's binary_logloss: 0.248081
[200]	training's auc: 0.794961	training's binary_logloss: 0.235878	valid_1's auc: 0.768844	valid_1's binary_logloss: 0.243236
[300]	training's auc: 0.808871	training's binary_logloss: 0.230495	valid_1's auc: 0.773391	valid_1's binary_logloss: 0.241532
[400]	training's auc: 0.820279	training's binary_logloss: 0.226192	valid_1's auc: 0.775854	valid_1's binary_logloss: 0.240642
[500]	training's auc: 0.830194	training's binary_logloss: 0.222463	valid_1's auc: 0.777347	valid_1's binary_logloss: 0.240109
| [95m 7       [0m | [95m 0.7773  [0m | [95m 0.6269  [0m | [95m 437.7   [0m | [95m 10.07   [0m | [95m 156.5   [0m | [95m 22.14   [0m | [95m 48.87   [0m | [95m 2.432   [0m | [95m 7.948   [0m | [95m 0.8027  [0m |




[100]	training's auc: 0.766314	training's binary_logloss: 0.246998	valid_1's auc: 0.755906	valid_1's binary_logloss: 0.249009
[200]	training's auc: 0.782906	training's binary_logloss: 0.239749	valid_1's auc: 0.766401	valid_1's binary_logloss: 0.244121
[300]	training's auc: 0.793527	training's binary_logloss: 0.235656	valid_1's auc: 0.77108	valid_1's binary_logloss: 0.242321
[400]	training's auc: 0.801673	training's binary_logloss: 0.232638	valid_1's auc: 0.773543	valid_1's binary_logloss: 0.241427
[500]	training's auc: 0.809076	training's binary_logloss: 0.229969	valid_1's auc: 0.775569	valid_1's binary_logloss: 0.240742
| [0m 8       [0m | [0m 0.7756  [0m | [0m 0.613   [0m | [0m 419.6   [0m | [0m 7.914   [0m | [0m 184.4   [0m | [0m 43.85   [0m | [0m 31.05   [0m | [0m 2.992   [0m | [0m 5.967   [0m | [0m 0.9756  [0m |




[100]	training's auc: 0.78098	training's binary_logloss: 0.242404	valid_1's auc: 0.759261	valid_1's binary_logloss: 0.247472
[200]	training's auc: 0.803413	training's binary_logloss: 0.232877	valid_1's auc: 0.769667	valid_1's binary_logloss: 0.242881
[300]	training's auc: 0.820247	training's binary_logloss: 0.226393	valid_1's auc: 0.774132	valid_1's binary_logloss: 0.241254
[400]	training's auc: 0.834488	training's binary_logloss: 0.220965	valid_1's auc: 0.776386	valid_1's binary_logloss: 0.24046
[500]	training's auc: 0.846554	training's binary_logloss: 0.216268	valid_1's auc: 0.77732	valid_1's binary_logloss: 0.240131
| [0m 9       [0m | [0m 0.7773  [0m | [0m 0.7293  [0m | [0m 445.3   [0m | [0m 15.63   [0m | [0m 129.9   [0m | [0m 32.65   [0m | [0m 63.37   [0m | [0m 2.618   [0m | [0m 5.035   [0m | [0m 0.707   [0m |




[100]	training's auc: 0.774312	training's binary_logloss: 0.244604	valid_1's auc: 0.75636	valid_1's binary_logloss: 0.248517
[200]	training's auc: 0.79368	training's binary_logloss: 0.236323	valid_1's auc: 0.767181	valid_1's binary_logloss: 0.243872
[300]	training's auc: 0.80704	training's binary_logloss: 0.231169	valid_1's auc: 0.772147	valid_1's binary_logloss: 0.242078
[400]	training's auc: 0.817948	training's binary_logloss: 0.227077	valid_1's auc: 0.774417	valid_1's binary_logloss: 0.241255
[500]	training's auc: 0.828034	training's binary_logloss: 0.223311	valid_1's auc: 0.776211	valid_1's binary_logloss: 0.240637
| [0m 10      [0m | [0m 0.7762  [0m | [0m 0.8039  [0m | [0m 416.1   [0m | [0m 13.74   [0m | [0m 128.4   [0m | [0m 1.951   [0m | [0m 60.25   [0m | [0m 16.36   [0m | [0m 8.629   [0m | [0m 0.5672  [0m |




[100]	training's auc: 0.774312	training's binary_logloss: 0.244504	valid_1's auc: 0.755825	valid_1's binary_logloss: 0.248613
[200]	training's auc: 0.794121	training's binary_logloss: 0.236098	valid_1's auc: 0.767264	valid_1's binary_logloss: 0.243891
[300]	training's auc: 0.807631	training's binary_logloss: 0.230906	valid_1's auc: 0.772205	valid_1's binary_logloss: 0.242133
[400]	training's auc: 0.818744	training's binary_logloss: 0.226771	valid_1's auc: 0.774615	valid_1's binary_logloss: 0.241259
[500]	training's auc: 0.82878	training's binary_logloss: 0.222992	valid_1's auc: 0.776244	valid_1's binary_logloss: 0.240682
| [0m 11      [0m | [0m 0.7762  [0m | [0m 0.9924  [0m | [0m 410.1   [0m | [0m 15.39   [0m | [0m 126.6   [0m | [0m 10.44   [0m | [0m 61.35   [0m | [0m 17.28   [0m | [0m 9.367   [0m | [0m 0.7424  [0m |




[100]	training's auc: 0.775431	training's binary_logloss: 0.244341	valid_1's auc: 0.758722	valid_1's binary_logloss: 0.247964
[200]	training's auc: 0.794984	training's binary_logloss: 0.23576	valid_1's auc: 0.769078	valid_1's binary_logloss: 0.243174
[300]	training's auc: 0.808803	training's binary_logloss: 0.230387	valid_1's auc: 0.773505	valid_1's binary_logloss: 0.241457
[400]	training's auc: 0.820298	training's binary_logloss: 0.226028	valid_1's auc: 0.776139	valid_1's binary_logloss: 0.240535
[500]	training's auc: 0.830162	training's binary_logloss: 0.222372	valid_1's auc: 0.777281	valid_1's binary_logloss: 0.240118
| [0m 12      [0m | [0m 0.7773  [0m | [0m 0.622   [0m | [0m 497.7   [0m | [0m 10.78   [0m | [0m 70.66   [0m | [0m 43.39   [0m | [0m 48.64   [0m | [0m 1.137   [0m | [0m 8.024   [0m | [0m 0.9837  [0m |




[100]	training's auc: 0.776713	training's binary_logloss: 0.245052	valid_1's auc: 0.758803	valid_1's binary_logloss: 0.248701
[200]	training's auc: 0.793283	training's binary_logloss: 0.236854	valid_1's auc: 0.767627	valid_1's binary_logloss: 0.243884
[300]	training's auc: 0.805659	training's binary_logloss: 0.231873	valid_1's auc: 0.772224	valid_1's binary_logloss: 0.24208
[400]	training's auc: 0.815797	training's binary_logloss: 0.22804	valid_1's auc: 0.774481	valid_1's binary_logloss: 0.241244
[500]	training's auc: 0.825279	training's binary_logloss: 0.224518	valid_1's auc: 0.776133	valid_1's binary_logloss: 0.240684
| [0m 13      [0m | [0m 0.7761  [0m | [0m 0.5053  [0m | [0m 415.2   [0m | [0m 12.72   [0m | [0m 131.4   [0m | [0m 7.043   [0m | [0m 62.17   [0m | [0m 17.93   [0m | [0m 6.222   [0m | [0m 0.8907  [0m |




[100]	training's auc: 0.777238	training's binary_logloss: 0.243655	valid_1's auc: 0.758414	valid_1's binary_logloss: 0.247922
[200]	training's auc: 0.797806	training's binary_logloss: 0.234774	valid_1's auc: 0.768722	valid_1's binary_logloss: 0.243263
[300]	training's auc: 0.812783	training's binary_logloss: 0.229033	valid_1's auc: 0.772991	valid_1's binary_logloss: 0.241691
[400]	training's auc: 0.825499	training's binary_logloss: 0.224258	valid_1's auc: 0.775631	valid_1's binary_logloss: 0.240774
[500]	training's auc: 0.836347	training's binary_logloss: 0.22015	valid_1's auc: 0.777315	valid_1's binary_logloss: 0.240172
| [0m 14      [0m | [0m 0.7773  [0m | [0m 0.7073  [0m | [0m 457.3   [0m | [0m 11.89   [0m | [0m 35.68   [0m | [0m 14.36   [0m | [0m 54.67   [0m | [0m 3.853   [0m | [0m 8.276   [0m | [0m 0.9112  [0m |




[100]	training's auc: 0.770453	training's binary_logloss: 0.245874	valid_1's auc: 0.756852	valid_1's binary_logloss: 0.248638
[200]	training's auc: 0.788331	training's binary_logloss: 0.238077	valid_1's auc: 0.76768	valid_1's binary_logloss: 0.243748
[300]	training's auc: 0.800523	training's binary_logloss: 0.233421	valid_1's auc: 0.772874	valid_1's binary_logloss: 0.24184
[400]	training's auc: 0.810157	training's binary_logloss: 0.229838	valid_1's auc: 0.775407	valid_1's binary_logloss: 0.240937
[500]	training's auc: 0.818794	training's binary_logloss: 0.226638	valid_1's auc: 0.777068	valid_1's binary_logloss: 0.240319
| [0m 15      [0m | [0m 0.7771  [0m | [0m 0.6512  [0m | [0m 498.1   [0m | [0m 15.84   [0m | [0m 68.41   [0m | [0m 38.94   [0m | [0m 42.8    [0m | [0m 9.776   [0m | [0m 8.89    [0m | [0m 0.5139  [0m |




[100]	training's auc: 0.77341	training's binary_logloss: 0.245239	valid_1's auc: 0.755979	valid_1's binary_logloss: 0.248746
[200]	training's auc: 0.791421	training's binary_logloss: 0.237453	valid_1's auc: 0.76664	valid_1's binary_logloss: 0.244083
[300]	training's auc: 0.802678	training's binary_logloss: 0.232932	valid_1's auc: 0.771279	valid_1's binary_logloss: 0.242317
[400]	training's auc: 0.811391	training's binary_logloss: 0.229561	valid_1's auc: 0.773415	valid_1's binary_logloss: 0.241533
[500]	training's auc: 0.819056	training's binary_logloss: 0.22663	valid_1's auc: 0.774956	valid_1's binary_logloss: 0.240983
| [0m 16      [0m | [0m 0.775   [0m | [0m 0.8225  [0m | [0m 418.7   [0m | [0m 6.561   [0m | [0m 125.2   [0m | [0m 1.872   [0m | [0m 61.34   [0m | [0m 18.91   [0m | [0m 5.198   [0m | [0m 0.7501  [0m |




[100]	training's auc: 0.77386	training's binary_logloss: 0.244778	valid_1's auc: 0.757287	valid_1's binary_logloss: 0.248361
[200]	training's auc: 0.792585	training's binary_logloss: 0.236604	valid_1's auc: 0.767684	valid_1's binary_logloss: 0.24366
[300]	training's auc: 0.805441	training's binary_logloss: 0.231652	valid_1's auc: 0.772212	valid_1's binary_logloss: 0.242009
[400]	training's auc: 0.816296	training's binary_logloss: 0.227614	valid_1's auc: 0.774813	valid_1's binary_logloss: 0.241097
[500]	training's auc: 0.826026	training's binary_logloss: 0.223991	valid_1's auc: 0.776472	valid_1's binary_logloss: 0.240536
| [0m 17      [0m | [0m 0.7765  [0m | [0m 0.8049  [0m | [0m 433.0   [0m | [0m 13.17   [0m | [0m 160.0   [0m | [0m 20.51   [0m | [0m 52.54   [0m | [0m 13.09   [0m | [0m 6.148   [0m | [0m 0.901   [0m |




[100]	training's auc: 0.78085	training's binary_logloss: 0.242886	valid_1's auc: 0.759977	valid_1's binary_logloss: 0.247535
[200]	training's auc: 0.801735	training's binary_logloss: 0.233515	valid_1's auc: 0.769908	valid_1's binary_logloss: 0.242763
[300]	training's auc: 0.817733	training's binary_logloss: 0.227287	valid_1's auc: 0.774073	valid_1's binary_logloss: 0.241181
[400]	training's auc: 0.83141	training's binary_logloss: 0.222034	valid_1's auc: 0.776654	valid_1's binary_logloss: 0.240259
[500]	training's auc: 0.843255	training's binary_logloss: 0.217495	valid_1's auc: 0.777633	valid_1's binary_logloss: 0.239889
| [95m 18      [0m | [95m 0.7776  [0m | [95m 0.6602  [0m | [95m 438.5   [0m | [95m 15.4    [0m | [95m 151.4   [0m | [95m 48.39   [0m | [95m 61.99   [0m | [95m 2.648   [0m | [95m 5.525   [0m | [95m 0.9571  [0m |




[100]	training's auc: 0.775084	training's binary_logloss: 0.243729	valid_1's auc: 0.757164	valid_1's binary_logloss: 0.247783
[200]	training's auc: 0.797413	training's binary_logloss: 0.234752	valid_1's auc: 0.76887	valid_1's binary_logloss: 0.243066
[300]	training's auc: 0.812476	training's binary_logloss: 0.22896	valid_1's auc: 0.773231	valid_1's binary_logloss: 0.24148
[400]	training's auc: 0.825107	training's binary_logloss: 0.224276	valid_1's auc: 0.77599	valid_1's binary_logloss: 0.240573
[500]	training's auc: 0.835461	training's binary_logloss: 0.220363	valid_1's auc: 0.776937	valid_1's binary_logloss: 0.240227
| [0m 19      [0m | [0m 0.7769  [0m | [0m 0.9743  [0m | [0m 440.1   [0m | [0m 10.74   [0m | [0m 158.4   [0m | [0m 25.92   [0m | [0m 50.89   [0m | [0m 0.3118  [0m | [0m 8.606   [0m | [0m 0.5562  [0m |




[100]	training's auc: 0.777977	training's binary_logloss: 0.243586	valid_1's auc: 0.75945	valid_1's binary_logloss: 0.247794
[200]	training's auc: 0.79842	training's binary_logloss: 0.234606	valid_1's auc: 0.769295	valid_1's binary_logloss: 0.24308
[300]	training's auc: 0.81335	training's binary_logloss: 0.228821	valid_1's auc: 0.773648	valid_1's binary_logloss: 0.241462
[400]	training's auc: 0.826328	training's binary_logloss: 0.223963	valid_1's auc: 0.776129	valid_1's binary_logloss: 0.240575
[500]	training's auc: 0.837185	training's binary_logloss: 0.219807	valid_1's auc: 0.777343	valid_1's binary_logloss: 0.240153
| [0m 20      [0m | [0m 0.7773  [0m | [0m 0.6416  [0m | [0m 428.1   [0m | [0m 13.21   [0m | [0m 138.1   [0m | [0m 24.62   [0m | [0m 53.08   [0m | [0m 2.327   [0m | [0m 3.747   [0m | [0m 0.9107  [0m |




[100]	training's auc: 0.779709	training's binary_logloss: 0.242618	valid_1's auc: 0.75925	valid_1's binary_logloss: 0.247489
[200]	training's auc: 0.802469	training's binary_logloss: 0.233093	valid_1's auc: 0.769449	valid_1's binary_logloss: 0.242906
[300]	training's auc: 0.819791	training's binary_logloss: 0.226594	valid_1's auc: 0.773905	valid_1's binary_logloss: 0.24126
[400]	training's auc: 0.834064	training's binary_logloss: 0.221224	valid_1's auc: 0.775927	valid_1's binary_logloss: 0.240533
[500]	training's auc: 0.846226	training's binary_logloss: 0.216637	valid_1's auc: 0.776871	valid_1's binary_logloss: 0.240202
| [0m 21      [0m | [0m 0.7769  [0m | [0m 0.7408  [0m | [0m 443.9   [0m | [0m 15.91   [0m | [0m 143.3   [0m | [0m 14.16   [0m | [0m 54.3    [0m | [0m 0.8651  [0m | [0m 0.6161  [0m | [0m 0.8543  [0m |




[100]	training's auc: 0.774476	training's binary_logloss: 0.244579	valid_1's auc: 0.757684	valid_1's binary_logloss: 0.248165
[200]	training's auc: 0.792983	training's binary_logloss: 0.236393	valid_1's auc: 0.767951	valid_1's binary_logloss: 0.243465
[300]	training's auc: 0.805925	training's binary_logloss: 0.23139	valid_1's auc: 0.772587	valid_1's binary_logloss: 0.241761
[400]	training's auc: 0.816667	training's binary_logloss: 0.227376	valid_1's auc: 0.774955	valid_1's binary_logloss: 0.240932
[500]	training's auc: 0.826274	training's binary_logloss: 0.223791	valid_1's auc: 0.776634	valid_1's binary_logloss: 0.240344
| [0m 22      [0m | [0m 0.7766  [0m | [0m 0.6861  [0m | [0m 443.4   [0m | [0m 9.642   [0m | [0m 142.9   [0m | [0m 47.75   [0m | [0m 49.46   [0m | [0m 7.697   [0m | [0m 1.186   [0m | [0m 0.6589  [0m |




[100]	training's auc: 0.778487	training's binary_logloss: 0.24286	valid_1's auc: 0.757973	valid_1's binary_logloss: 0.247651
[200]	training's auc: 0.800987	training's binary_logloss: 0.233558	valid_1's auc: 0.769224	valid_1's binary_logloss: 0.242993
[300]	training's auc: 0.817079	training's binary_logloss: 0.227416	valid_1's auc: 0.773486	valid_1's binary_logloss: 0.24148
[400]	training's auc: 0.830706	training's binary_logloss: 0.222223	valid_1's auc: 0.776108	valid_1's binary_logloss: 0.240558
[500]	training's auc: 0.842467	training's binary_logloss: 0.217763	valid_1's auc: 0.777168	valid_1's binary_logloss: 0.240173
| [0m 23      [0m | [0m 0.7772  [0m | [0m 0.9454  [0m | [0m 479.7   [0m | [0m 12.77   [0m | [0m 41.53   [0m | [0m 29.49   [0m | [0m 59.85   [0m | [0m 5.279   [0m | [0m 3.318   [0m | [0m 0.6521  [0m |




[100]	training's auc: 0.774475	training's binary_logloss: 0.244433	valid_1's auc: 0.757648	valid_1's binary_logloss: 0.248161
[200]	training's auc: 0.793114	training's binary_logloss: 0.236371	valid_1's auc: 0.767894	valid_1's binary_logloss: 0.243552
[300]	training's auc: 0.805464	training's binary_logloss: 0.231522	valid_1's auc: 0.772303	valid_1's binary_logloss: 0.241926
[400]	training's auc: 0.815564	training's binary_logloss: 0.227695	valid_1's auc: 0.774663	valid_1's binary_logloss: 0.241108
[500]	training's auc: 0.824748	training's binary_logloss: 0.224313	valid_1's auc: 0.776125	valid_1's binary_logloss: 0.240619
| [0m 24      [0m | [0m 0.7761  [0m | [0m 0.7125  [0m | [0m 461.4   [0m | [0m 8.193   [0m | [0m 53.73   [0m | [0m 26.12   [0m | [0m 51.01   [0m | [0m 8.932   [0m | [0m 0.104   [0m | [0m 0.5357  [0m |




[100]	training's auc: 0.7812	training's binary_logloss: 0.24213	valid_1's auc: 0.75926	valid_1's binary_logloss: 0.247279
[200]	training's auc: 0.804503	training's binary_logloss: 0.232378	valid_1's auc: 0.769879	valid_1's binary_logloss: 0.242753
[300]	training's auc: 0.821556	training's binary_logloss: 0.225764	valid_1's auc: 0.77421	valid_1's binary_logloss: 0.241194
[400]	training's auc: 0.835767	training's binary_logloss: 0.220302	valid_1's auc: 0.776371	valid_1's binary_logloss: 0.240432
[500]	training's auc: 0.847658	training's binary_logloss: 0.215691	valid_1's auc: 0.777246	valid_1's binary_logloss: 0.240129
| [0m 25      [0m | [0m 0.7772  [0m | [0m 0.8755  [0m | [0m 426.7   [0m | [0m 13.47   [0m | [0m 140.1   [0m | [0m 39.08   [0m | [0m 63.44   [0m | [0m 1.325   [0m | [0m 4.077   [0m | [0m 0.5174  [0m |




[100]	training's auc: 0.774693	training's binary_logloss: 0.244437	valid_1's auc: 0.758155	valid_1's binary_logloss: 0.248061
[200]	training's auc: 0.793825	training's binary_logloss: 0.236161	valid_1's auc: 0.768486	valid_1's binary_logloss: 0.243374
[300]	training's auc: 0.806909	training's binary_logloss: 0.231046	valid_1's auc: 0.772907	valid_1's binary_logloss: 0.241705
[400]	training's auc: 0.817538	training's binary_logloss: 0.227001	valid_1's auc: 0.77532	valid_1's binary_logloss: 0.240862
[500]	training's auc: 0.826923	training's binary_logloss: 0.223504	valid_1's auc: 0.776833	valid_1's binary_logloss: 0.240331
| [0m 26      [0m | [0m 0.7768  [0m | [0m 0.6977  [0m | [0m 469.9   [0m | [0m 8.59    [0m | [0m 19.46   [0m | [0m 31.14   [0m | [0m 49.08   [0m | [0m 4.494   [0m | [0m 6.626   [0m | [0m 0.8361  [0m |




[100]	training's auc: 0.778045	training's binary_logloss: 0.244055	valid_1's auc: 0.759044	valid_1's binary_logloss: 0.248226
[200]	training's auc: 0.796233	training's binary_logloss: 0.235667	valid_1's auc: 0.768474	valid_1's binary_logloss: 0.243536
[300]	training's auc: 0.809053	training's binary_logloss: 0.230515	valid_1's auc: 0.773128	valid_1's binary_logloss: 0.241752
[400]	training's auc: 0.819448	training's binary_logloss: 0.226472	valid_1's auc: 0.775171	valid_1's binary_logloss: 0.241002
[500]	training's auc: 0.829235	training's binary_logloss: 0.2228	valid_1's auc: 0.776411	valid_1's binary_logloss: 0.24056
| [0m 27      [0m | [0m 0.7764  [0m | [0m 0.5896  [0m | [0m 476.9   [0m | [0m 7.579   [0m | [0m 26.19   [0m | [0m 3.957   [0m | [0m 63.91   [0m | [0m 13.22   [0m | [0m 1.491   [0m | [0m 0.9085  [0m |




[100]	training's auc: 0.771861	training's binary_logloss: 0.244863	valid_1's auc: 0.757265	valid_1's binary_logloss: 0.248069
[200]	training's auc: 0.791359	training's binary_logloss: 0.236792	valid_1's auc: 0.768042	valid_1's binary_logloss: 0.243445
[300]	training's auc: 0.80466	training's binary_logloss: 0.231733	valid_1's auc: 0.772717	valid_1's binary_logloss: 0.241718
[400]	training's auc: 0.815551	training's binary_logloss: 0.227692	valid_1's auc: 0.77547	valid_1's binary_logloss: 0.240779
[500]	training's auc: 0.824795	training's binary_logloss: 0.224284	valid_1's auc: 0.776971	valid_1's binary_logloss: 0.240268
| [0m 28      [0m | [0m 0.777   [0m | [0m 0.7844  [0m | [0m 497.8   [0m | [0m 11.24   [0m | [0m 48.74   [0m | [0m 41.13   [0m | [0m 41.77   [0m | [0m 1.772   [0m | [0m 4.538   [0m | [0m 0.9283  [0m |




[100]	training's auc: 0.778787	training's binary_logloss: 0.243316	valid_1's auc: 0.759673	valid_1's binary_logloss: 0.247687
[200]	training's auc: 0.798839	training's binary_logloss: 0.234367	valid_1's auc: 0.769386	valid_1's binary_logloss: 0.24303
[300]	training's auc: 0.814555	training's binary_logloss: 0.228362	valid_1's auc: 0.773898	valid_1's binary_logloss: 0.241323
[400]	training's auc: 0.827583	training's binary_logloss: 0.223427	valid_1's auc: 0.776396	valid_1's binary_logloss: 0.240454
[500]	training's auc: 0.838887	training's binary_logloss: 0.21912	valid_1's auc: 0.777674	valid_1's binary_logloss: 0.239993
| [95m 29      [0m | [95m 0.7777  [0m | [95m 0.6173  [0m | [95m 499.4   [0m | [95m 13.45   [0m | [95m 60.94   [0m | [95m 23.23   [0m | [95m 56.33   [0m | [95m 4.316   [0m | [95m 2.0     [0m | [95m 0.6298  [0m |




[100]	training's auc: 0.774989	training's binary_logloss: 0.244873	valid_1's auc: 0.7579	valid_1's binary_logloss: 0.248484
[200]	training's auc: 0.792792	training's binary_logloss: 0.236701	valid_1's auc: 0.767601	valid_1's binary_logloss: 0.243744
[300]	training's auc: 0.806074	training's binary_logloss: 0.231522	valid_1's auc: 0.772761	valid_1's binary_logloss: 0.241832
[400]	training's auc: 0.816895	training's binary_logloss: 0.227504	valid_1's auc: 0.775096	valid_1's binary_logloss: 0.240998
[500]	training's auc: 0.826579	training's binary_logloss: 0.22387	valid_1's auc: 0.776779	valid_1's binary_logloss: 0.240405
| [0m 30      [0m | [0m 0.7768  [0m | [0m 0.6016  [0m | [0m 498.3   [0m | [0m 14.95   [0m | [0m 45.87   [0m | [0m 23.19   [0m | [0m 55.85   [0m | [0m 12.81   [0m | [0m 8.804   [0m | [0m 0.9214  [0m |


# Output of the Iteration action result

In [13]:
# The res of the Bayesian Optimization object has all function return results and parameter result values at that time whenever it is performed.
lgbBO.res

[{'target': 0.7754019849993636,
  'params': {'colsample_bytree': 0.7744067519636624,
   'max_bin': 360.44278952248555,
   'max_depth': 12.027633760716439,
   'min_child_samples': 113.52780476941041,
   'min_child_weight': 21.75908516760633,
   'num_leaves': 49.835764522666246,
   'reg_alpha': 21.884984691022,
   'reg_lambda': 8.917838234820016,
   'subsample': 0.9818313802505146}},
 {'target': 0.7756649587625722,
  'params': {'colsample_bytree': 0.6917207594128889,
   'max_bin': 397.94526866050563,
   'max_depth': 11.288949197529044,
   'min_child_samples': 117.92846660784714,
   'min_child_weight': 46.35423527634039,
   'num_leaves': 26.841442327915477,
   'reg_alpha': 4.36559369208002,
   'reg_lambda': 0.20316375600581688,
   'subsample': 0.916309922773969}},
 {'target': 0.7773337907658435,
  'params': {'colsample_bytree': 0.8890783754749252,
   'max_bin': 436.30595264094137,
   'max_depth': 15.78618342232764,
   'min_child_samples': 161.8401272011775,
   'min_child_weight': 23.61248

# As a result of the Iteration
- the index with the maximum target value is extracted from the Dictionary and the parameter value is extracted.

In [14]:
# Extract all target values in dictionary
target_list = []
for result in lgbBO.res:
    target = result['target']
    target_list.append(target)
print(target_list)
# Extract the index with the largest target value
print('maximum target index:', np.argmax(np.array(target_list)))

[0.7754019849993636, 0.7756649587625722, 0.7773337907658435, 0.7743404274231133, 0.7740445759828272, 0.7741741249590283, 0.7773465253458576, 0.7755691213981508, 0.7773196456448476, 0.7762113766061457, 0.7762440223699537, 0.7772812798832063, 0.7761326579356012, 0.7773153203035257, 0.7770684120937986, 0.7749555583092521, 0.7764718835110829, 0.7776325792447284, 0.7769369951038645, 0.7773433992820599, 0.7768711555620157, 0.7766344976011137, 0.7771676535002521, 0.7761250905738466, 0.7772459512323278, 0.7768328263346566, 0.776410856963801, 0.7769705589900684, 0.7776743569906369, 0.7767790351636954]
maximum target index: 28


In [15]:
# Extract the corresponding parameter from res based on index value with the largest target value.
max_dict = lgbBO.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.7776743569906369, 'params': {'colsample_bytree': 0.6173018328580049, 'max_bin': 499.3972330245431, 'max_depth': 13.449824828941859, 'min_child_samples': 60.93736887309616, 'min_child_weight': 23.22575665016526, 'num_leaves': 56.32535872887304, 'reg_alpha': 4.316459771428574, 'reg_lambda': 2.0002145324686245, 'subsample': 0.6297763732797832}}


# Retest based on optimized hyperparameters

In [16]:
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=1000,
                learning_rate=0.02,
                max_depth = 13,
                num_leaves=57,
                colsample_bytree=0.638,
                subsample=0.682,
                max_bin=435,
                reg_alpha=0.936,
                reg_lambda=4.533,
                min_child_weight=25,
                min_child_samples=166,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

In [17]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)

  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)




[100]	training's auc: 0.780153	training's binary_logloss: 0.242981	valid_1's auc: 0.759911	valid_1's binary_logloss: 0.24756
[200]	training's auc: 0.801589	training's binary_logloss: 0.233556	valid_1's auc: 0.769847	valid_1's binary_logloss: 0.242803
[300]	training's auc: 0.817628	training's binary_logloss: 0.227376	valid_1's auc: 0.773659	valid_1's binary_logloss: 0.241322
[400]	training's auc: 0.831406	training's binary_logloss: 0.222166	valid_1's auc: 0.776379	valid_1's binary_logloss: 0.240369
[500]	training's auc: 0.84293	training's binary_logloss: 0.217731	valid_1's auc: 0.777363	valid_1's binary_logloss: 0.240028
[600]	training's auc: 0.853247	training's binary_logloss: 0.21375	valid_1's auc: 0.777921	valid_1's binary_logloss: 0.239843
[700]	training's auc: 0.862535	training's binary_logloss: 0.210085	valid_1's auc: 0.77821	valid_1's binary_logloss: 0.239772
[800]	training's auc: 0.871324	training's binary_logloss: 0.20648	valid_1's auc: 0.778582	valid_1's binary_logloss: 0.2396

In [18]:
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_tuning_01.csv', index=False)

# Re-tuning hyper parameters by CV

In [19]:
bayesian_params = {
    'max_depth': (6, 16), 
    'num_leaves': (24, 64), 
    'min_data_in_leaf': (10, 200), # min_child_samples
    'min_child_weight':(1, 50),
    'bagging_fraction':(0.5, 1.0), # subsample
    'feature_fraction': (0.5, 1.0), # colsample_bytree
    'max_bin':(10, 500),
    'lambda_l2':(0.001, 10), # reg_lambda
    'lambda_l1': (0.01, 50) # reg_alpha
}

In [20]:
import lightgbm as lgb

train_data = lgb.Dataset(data=ftr_app, label=target_app, free_raw_data=False)
def lgb_roc_eval_cv(max_depth, num_leaves, min_data_in_leaf, min_child_weight, bagging_fraction, 
                 feature_fraction,  max_bin, lambda_l2, lambda_l1):   
    params = {
        "num_iterations":500, "learning_rate":0.02,
        'early_stopping_rounds':100, 'metric':'auc',
        'max_depth': int(round(max_depth)), #  Change double type hyperparameters to integer types because double type values are entered at call time
        'num_leaves': int(round(num_leaves)), 
        'min_data_in_leaf': int(round(min_data_in_leaf)),
        'min_child_weight': int(round(min_child_weight)),
        'bagging_fraction': max(min(bagging_fraction, 1), 0), 
        'feature_fraction': max(min(feature_fraction, 1), 0),
        'max_bin':  max(int(round(max_bin)),10),
        'lambda_l2': max(lambda_l2,0),
        'lambda_l1': max(lambda_l1, 0)
    }
    # lightgbm cv 
    cv_result = lgb.cv(params, train_data, nfold=3, seed=0,  verbose_eval =100,  early_stopping_rounds=50, metrics=['auc'])
    return max(cv_result['auc-mean'])   

In [21]:
max_dict = lgbBO.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.7776743569906369, 'params': {'colsample_bytree': 0.6173018328580049, 'max_bin': 499.3972330245431, 'max_depth': 13.449824828941859, 'min_child_samples': 60.93736887309616, 'min_child_weight': 23.22575665016526, 'num_leaves': 56.32535872887304, 'reg_alpha': 4.316459771428574, 'reg_lambda': 2.0002145324686245, 'subsample': 0.6297763732797832}}


In [22]:
def train_apps_all(apps_all_train):
    ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
    target_app = apps_all_train['TARGET']

    train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
    print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
    clf = LGBMClassifier(
                nthread=4,
                n_estimators=1000,
                learning_rate=0.02,
                max_depth = 10,
                num_leaves=60,
                colsample_bytree=0.511,
                subsample=0.785,
                max_bin=208,
                reg_alpha=7.009,
                reg_lambda=6.579,
                min_child_weight=40,
                min_child_samples=91,
                silent=-1,
                verbose=-1,
                )

    clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100, 
                early_stopping_rounds= 100)
    
    return clf

In [23]:
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)

  prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]


prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)




[100]	training's auc: 0.778267	training's binary_logloss: 0.244198	valid_1's auc: 0.759489	valid_1's binary_logloss: 0.248092
[200]	training's auc: 0.796481	training's binary_logloss: 0.235631	valid_1's auc: 0.768743	valid_1's binary_logloss: 0.243337
[300]	training's auc: 0.809962	training's binary_logloss: 0.230278	valid_1's auc: 0.773131	valid_1's binary_logloss: 0.241656
[400]	training's auc: 0.821143	training's binary_logloss: 0.225997	valid_1's auc: 0.775193	valid_1's binary_logloss: 0.240905
[500]	training's auc: 0.831372	training's binary_logloss: 0.222146	valid_1's auc: 0.776484	valid_1's binary_logloss: 0.24044
[600]	training's auc: 0.840357	training's binary_logloss: 0.21869	valid_1's auc: 0.777441	valid_1's binary_logloss: 0.240083
[700]	training's auc: 0.848739	training's binary_logloss: 0.215451	valid_1's auc: 0.777972	valid_1's binary_logloss: 0.239874
[800]	training's auc: 0.856446	training's binary_logloss: 0.212414	valid_1's auc: 0.778259	valid_1's binary_logloss: 0.2

In [24]:
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_tuning_02.csv', index=False)