In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import gc

In [11]:
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

# Prepping Data for algorithm

In [3]:
X = pd.read_csv('../input/final_data.csv')
Y = pd.read_csv('../input/final_target.csv', header=None)

In [4]:
Y = Y[1]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=18)

In [6]:
train_lgb = lgb.Dataset(data=X_train, label=y_train)

# Defining Search Space

In [22]:
space  = [Integer(3, 10, name='max_depth'),
          Integer(5, 50, name='num_leaves'),
          Real(0, 0.5, name='reg_alpha'),
          Real(0.6, 0.9, name='subsample'),
          Real(0.6, 0.9, name='colsample_bytree'),
          Real(0, 0.5, name='reg_lambda'),
          Real(0.01, 0.02, name='learning_rate'),
          Real(0.01, 0.03, name='min_split_gain'),
          Integer(1, 40, name='min_child_weight')
         ]

# Defining Objective function to optimize

In [23]:
def objective_func(values):
    params = {
        'task': 'train', 
        'boosting_type': 'gbdt', 
        'objective': 'binary', 
        'metric': 'auc', 
        'learning_rate': values[6], 
        'num_leaves': values[1], 
        'verbose': 0 ,
        'colsample_bytree':values[4],
        'subsample':values[3], 
        'max_depth':values[0], 
        'reg_alpha':values[2], 
        'reg_lambda':values[5], 
        'min_split_gain':values[7], 
        'min_child_weight':values[8]
    }
    print('----------------------------------------------------------------------------------------')
    print('Current Parameters : ', params)
    model = lgb.train(params, train_lgb,500)
    predictions_lgb = model.predict(X_test)
    auc = - roc_auc_score(y_test, predictions_lgb)
    print('Current Score : ', -auc)
    gc.collect()
    return auc

# Running Optimization Algorithm

In [24]:
result_gp = gp_minimize(objective_func, space, n_calls=50, random_state=0, n_random_starts=10)

----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.012975346065444723, 'num_leaves': 43, 'verbose': 0, 'colsample_bytree': 0.7870691090357917, 'subsample': 0.8541755216352377, 'max_depth': 7, 'reg_alpha': 0.4289728088113785, 'reg_lambda': 0.19219085364634997, 'min_split_gain': 0.011134259546348864, 'min_child_weight': 12}
Current Score :  0.774952334258902
----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.016481718720511972, 'num_leaves': 42, 'verbose': 0, 'colsample_bytree': 0.8508236290612133, 'subsample': 0.7178354388302489, 'max_depth': 6, 'reg_alpha': 0.23998858618752872, 'reg_lambda': 0.16869808020863422, 'min_split_gain': 0.01736483079681096, 'min_child_weight

Current Score :  0.7719643442636994
----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.01969086651304538, 'num_leaves': 49, 'verbose': 0, 'colsample_bytree': 0.8923098233615788, 'subsample': 0.8775610156669194, 'max_depth': 10, 'reg_alpha': 0.4796513969074332, 'reg_lambda': 0.04558294506546424, 'min_split_gain': 0.013317761460863987, 'min_child_weight': 11}
Current Score :  0.7798377921403842
----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.02, 'num_leaves': 50, 'verbose': 0, 'colsample_bytree': 0.9, 'subsample': 0.9, 'max_depth': 10, 'reg_alpha': 0.5, 'reg_lambda': 0.5, 'min_split_gain': 0.03, 'min_child_weight': 40}
Current Score :  0.7800881189414645
------------

Current Score :  0.7802969933682882
----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.0197066010892396, 'num_leaves': 5, 'verbose': 0, 'colsample_bytree': 0.8744300325615251, 'subsample': 0.8918752700200847, 'max_depth': 10, 'reg_alpha': 0.3349466999185929, 'reg_lambda': 0.1653244226686212, 'min_split_gain': 0.029730977689066594, 'min_child_weight': 31}
Current Score :  0.7669278197370634
----------------------------------------------------------------------------------------
Current Parameters :  {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'learning_rate': 0.019965607381512124, 'num_leaves': 50, 'verbose': 0, 'colsample_bytree': 0.7572106721608783, 'subsample': 0.681242141107915, 'max_depth': 7, 'reg_alpha': 0.11022776975402535, 'reg_lambda': 0.3628287891574088, 'min_split_gain': 0.0131

In [26]:
result_gp.fun

-0.7812854539998564

In [32]:
result_gp.values.

<function OptimizeResult.values>