In [3]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import PolynomialFeatures
from imblearn.over_sampling import SMOTE
import pickle
from lightgbm.sklearn import LGBMClassifier

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [4]:
data_train = pd.read_csv("cleaned_2013_14", low_memory = False);
data_test = pd.read_csv("cleaned_2015", low_memory = False);

In [5]:
data_test = data_test.dropna()
data_test.columns == data_train.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])

In [6]:
def split_data(df, cols):
    x = df.drop(cols, axis = 1)
    y = df.paid
    return x, y

cols_to_drop_training = ['loan_status', 'paid', 'amnt', 'total_pymnt', 'term_adj', 'zip_code']
x_train_initial, y_train_initial = split_data(data_train, cols_to_drop_training)
x_test, y_test = split_data(data_test, cols_to_drop_training)

In [7]:
sm = SMOTE(random_state=1, ratio = 1.0)
x_train, y_train = sm.fit_sample(x_train_initial, y_train_initial)

In [8]:
x_train.shape, x_test.shape

((578050, 44), (346843, 44))

In [39]:
params = {}

params['LGBM__learning_rate'] = [0.001, 0.01, 0.1]
params['LGBM__boosting_type'] = ['gbdt', 'dart']
params['LGBM__max_depth'] = [5, 7, 10, 12, 15]
params['LGBM__lambda_l2'] = [0, 0.001, 0.01, 0.1]

In [40]:
# Use Pipeline instead of make_pipeline
pipe = Pipeline([('POLY', PolynomialFeatures(degree=2, include_bias=False)),
            ('LGBM',LGBMClassifier(objective = 'binary', metric = 'binary_logloss',
                                   n_jobs = 5, bagging_fraction = 0.7))])


# lgbm = pipe.fit(x_train, y_train)

# Instantiate GS
gs = GridSearchCV(pipe, params, verbose=10, cv=4)

In [41]:
# pipe.get_params().keys()

In [42]:
gs.fit(x_train, y_train)

Fitting 4 folds for each of 120 candidates, totalling 480 fits
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.559495965789, total=  56.5s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.4min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.649094884854, total=  56.9s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  2.8min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.65625, total=  55.1s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  4.1min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.530786370682, total=  56.6s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  5.5min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.571743914084, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  7.0min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.698578664748, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  8.5min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.663142161205, total=  59.1s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  9.9min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.550390279008, total=  60.0s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 11.4min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, score=0.558499522538, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 12.8min remaining:    0.0s


[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, score=0.703014282551, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, score=0.675279561559, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, score=0.534585363153, total=  59.5s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0, score=0.55685954302, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=12

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.812922110275, total=  52.7s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.726361824624, total=  53.8s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0, score=0.556403620461, total=  53.5s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, score=0.538494540321, total=  57.0s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0, s

[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.001, score=0.685894596988, total=  54.6s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.001, score=0.668242083702, total=  54.3s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.001, score=0.558707927369, total=  55.0s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.001, score=0.595603194154, total=  58.7s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LG

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001, score=0.528592385513, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001, score=0.833231842338, total=  59.5s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001, score=0.740194585917, total=  59.4s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=15, LGBM__lambda_l2=0.001, score=0.563053587245, total=  57.1s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, L

[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.01, score=0.529686116032, total=  59.1s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01, score=0.60198319886, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01, score=0.748256200177, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01, score=0.699388286094, total=  59.4s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGBM__max_depth=12, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=gbdt, LGB

[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1, score=0.550390279008, total=  57.9s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1, score=0.558499522538, total=  59.8s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1, score=0.703014282551, total=  59.6s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1, score=0.675279561559, total=  59.8s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LGBM__max_depth=10, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=gbdt, LG

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=5, LGBM__lambda_l2=0.1, score=0.555517880868, total=  51.2s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1, score=0.553974009439, total=  54.8s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1, score=0.810569364482, total=  53.4s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1, score=0.727282163419, total=  54.0s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=gbdt, LGBM__max_depth=7, LGBM__la

[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0, score=0.539996678477, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.580379755595, total=  57.4s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.65405641054, total=  56.8s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0, score=0.645641884411, total=  56.4s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0, score=0.564547379493, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0, score=0.816658824181, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0, score=0.720680635518, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0, score=0.548085972099, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lam

[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.001, score=0.531333038087, total= 1.1min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001, score=0.614812405718, total= 1.1min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001, score=0.742222099203, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001, score=0.689589791851, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.001 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=

[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01, score=0.658436669619, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01, score=0.554722099203, total=  59.7s
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.01, score=0.576214069225, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.01, score=0.693347265279, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=10, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=da

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.01, score=0.767783990257, total=  54.7s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.01, score=0.678490367582, total=  54.3s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.01, score=0.547961414969, total=  54.6s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01, score=0.583590517182, total=  58.4s
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=7, LGBM__lambda_l2=0.01 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=7, 

[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1, score=0.700876051816, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1, score=0.681818810895, total= 1.0min
[CV] LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.001, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1, score=0.539996678477, total= 1.0min
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.1, score=0.570795909047, total=  56.6s
[CV] LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__max_depth=5, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.01, LGBM__boosting_type=dart, LGBM__ma

[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.1, score=0.817579162976, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.1, score=0.717760462799, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=12, LGBM__lambda_l2=0.1, score=0.546362931798, total= 1.0min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1, score=0.564561218982, total= 1.1min
[CV] LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15, LGBM__lambda_l2=0.1 
[CV]  LGBM__learning_rate=0.1, LGBM__boosting_type=dart, LGBM__max_depth=15,

[Parallel(n_jobs=1)]: Done 480 out of 480 | elapsed: 682.1min finished


GridSearchCV(cv=4, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('POLY', PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)), ('LGBM', LGBMClassifier(bagging_fraction=0.7, boosting_type='gbdt', class_weight=None,
        colsample_bytree=1.0, importance_type='split', learning_rate=0.1,
        max_depth=-1, metric='binary_logloss', m...0, reg_lambda=0.0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=0))]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'LGBM__learning_rate': [0.001, 0.01, 0.1], 'LGBM__boosting_type': ['gbdt', 'dart'], 'LGBM__max_depth': [5, 7, 10, 12, 15], 'LGBM__lambda_l2': [0, 0.001, 0.01, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=10)

In [43]:
print("Best parameters set found on training set:")
print()
print(gs.best_params_)
print()
print("Grid scores on training set:")
print()
means = gs.cv_results_['mean_test_score']
stds = gs.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, gs.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

Best parameters set found on training set:
()
{'LGBM__learning_rate': 0.1, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 15, 'LGBM__lambda_l2': 0}
()
Grid scores on training set:
()
0.599 (+/-0.110) for {'LGBM__learning_rate': 0.001, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 5, 'LGBM__lambda_l2': 0}
0.621 (+/-0.123) for {'LGBM__learning_rate': 0.001, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 7, 'LGBM__lambda_l2': 0}
0.618 (+/-0.145) for {'LGBM__learning_rate': 0.001, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 10, 'LGBM__lambda_l2': 0}
0.626 (+/-0.145) for {'LGBM__learning_rate': 0.001, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 12, 'LGBM__lambda_l2': 0}
0.628 (+/-0.139) for {'LGBM__learning_rate': 0.001, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 15, 'LGBM__lambda_l2': 0}
0.630 (+/-0.101) for {'LGBM__learning_rate': 0.01, 'LGBM__boosting_type': 'gbdt', 'LGBM__max_depth': 5, 'LGBM__lambda_l2': 0}
0.633 (+/-0.157) for {'LGBM__learning_rate': 0.01,

In [44]:
train_preds_true = {'proba' : gs.best_estimator_.predict_proba(x_train)[:, 1], 'true_val' : y_train}
test_preds_true = {'proba' : gs.best_estimator_.predict_proba(x_test)[:, 1], 'true_val' : y_test}

In [45]:
AUC_train = roc_auc_score(train_preds_true['true_val'], train_preds_true['proba'])
AUC_test = roc_auc_score(test_preds_true['true_val'], test_preds_true['proba'])

In [47]:
train_score = gs.best_estimator_.score(x_train, y_train)
test_score = gs.best_estimator_.score(x_test, y_test)

In [46]:
print('AUC Train: ', AUC_train);
print('AUC Test: ', AUC_test);

('AUC Train: ', 0.9160031874431516)
('AUC Test: ', 0.6405654177058699)


In [None]:
# ## Predict
# train_predictions = lgbm.predict(x_train)
# test_predictions = lgbm.predict(x_test)

# ## Accuracy Score
# train_score = lgbm.score(x_train, y_train)
# test_score = lgbm.score(x_test, y_test)

# print("train accuracy: ", train_score)
# print("test accuracy: ", test_score)

# ## AUC
# train_predictions_class = np.where(train_predictions > 0.5, 1, 0)
# test_predictions_class = np.where(test_predictions > 0.5, 1, 0)

# print('Train ROC AUC: ', roc_auc_score(train_predictions_class, y_train))
# print('Test  ROC AUC: ', roc_auc_score(test_predictions_class, y_test))

In [None]:
# from sklearn.metrics import roc_curve
# y_pred = lgbm.predict(x_test).ravel()
# fpr, tpr, thresholds = roc_curve(y_test, y_pred)

# from sklearn.metrics import auc
# auc_lgbm = auc(fpr, tpr)

# plt.figure(1)
# plt.plot([0, 1], [0, 1], 'k--')
# plt.plot(fpr, tpr, label='LGBM (area = {:.3f})'.format(auc_lgbm))
# plt.xlabel('False positive rate')
# plt.ylabel('True positive rate')
# plt.title('ROC curve')
# plt.legend(loc='best')
# plt.show()

In [None]:
# Dump the tuned classifier with Pickle
pkl_filename = 'Tuned_LGBM.pkl'

# Open the file to save as pkl file
model_pkl = open(pkl_filename, 'wb')
pickle.dump(gs.best_estimator_, model_pkl)

# Close the pickle instances
model_pkl.close()