## Importing libraries


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from skopt import BayesSearchCV
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler

## Importing the data and preprocessing

In [2]:
new_column_names = []
for i in range(60):
    new_column_names.append(f"Feature {i}")

new_column_names.append("Class")

In [3]:
train = pd.read_csv('../input/sonaralldata/sonar.all-data.csv',header=None, names= new_column_names)
train.head()

Unnamed: 0,Feature 0,Feature 1,Feature 2,Feature 3,Feature 4,Feature 5,Feature 6,Feature 7,Feature 8,Feature 9,...,Feature 51,Feature 52,Feature 53,Feature 54,Feature 55,Feature 56,Feature 57,Feature 58,Feature 59,Class
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [4]:
train.isnull().sum()

Feature 0     0
Feature 1     0
Feature 2     0
Feature 3     0
Feature 4     0
             ..
Feature 56    0
Feature 57    0
Feature 58    0
Feature 59    0
Class         0
Length: 61, dtype: int64

In [5]:
# Replacing M with 0 and R with 1
train = train.replace({'Class': {'M': 0, 
                                'R': 1}})

In [6]:
x_train = train.drop("Class",axis=1)
y_train = train['Class']

In [7]:
y_train.value_counts()

0    111
1     97
Name: Class, dtype: int64

In [8]:
# scaling the dataset
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)

In [9]:
X_train,X_val,Y_train,Y_val = train_test_split(x_train,y_train,test_size = 0.1,random_state=26,stratify=y_train)

## Bayesian Optimization

In [10]:
%%time
def bayes_parameter_opt_lgb(X, y, init_round=15, opt_round=25, n_folds=5, random_seed=6,n_estimators=1000, output_process=False):
    # prepare data
    train_data = lgb.Dataset(data=X, label=y, free_raw_data=False)
    # parameters
    def lgb_eval(learning_rate,num_leaves, feature_fraction, bagging_fraction, max_depth, max_bin, min_data_in_leaf,min_sum_hessian_in_leaf,subsample):
        params = {'application':'binary', 'metric':'auc'}
        params['learning_rate'] = max(min(learning_rate, 1), 0)
        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['max_bin'] = int(round(max_depth))
        params['min_data_in_leaf'] = int(round(min_data_in_leaf))
        params['min_sum_hessian_in_leaf'] = min_sum_hessian_in_leaf
        params['subsample'] = max(min(subsample, 1), 0)
                
        cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, stratified=True, verbose_eval =200, metrics=['auc'])
        return max(cv_result['auc-mean'])
     
    lgbBO = BayesianOptimization(lgb_eval, {'learning_rate': (0.001, 0.2),
                                            'num_leaves': (25, 60),
                                            'feature_fraction': (0.1, 1),
                                            'bagging_fraction': (0.5, 1),
                                           'max_depth': (2, 20),
                                            'max_bin':(20,90),
                                            'min_data_in_leaf': (20, 80),
                                            'min_sum_hessian_in_leaf':(0,100),
                                           'subsample': (0.01, 1.0)}, random_state=200)


    lgbBO.maximize(init_points=init_round, n_iter=opt_round)
    
    model_auc=[]
    for model in range(len( lgbBO.res)):
        model_auc.append(lgbBO.res[model]['target'])
    
    # return best parameters
    return lgbBO.res[pd.Series(model_auc).idxmax()]['target'],lgbBO.res[pd.Series(model_auc).idxmax()]['params']

opt_params = bayes_parameter_opt_lgb(X_train, Y_train, init_round=5, opt_round=10, n_folds=5, random_seed=6,n_estimators=1000)

|   iter    |  target   | baggin... | featur... | learni... |  max_bin  | max_depth | min_da... | min_su... | num_le... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 960
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 960
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 960
[LightGBM] [Info] Number of data points in the train set: 150, number of used feat




| [0m 1       [0m | [0m 0.5     [0m | [0m 0.9738  [0m | [0m 0.3039  [0m | [0m 0.1193  [0m | [0m 49.98   [0m | [0m 15.75   [0m | [0m 20.17   [0m | [0m 35.74   [0m | [0m 56.84   [0m | [0m 0.4615  [0m |
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.
[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


| [95m 2       [0m | [95m 0.7726  [0m | [95m 0.9909  [0m | [95m 0.8806  [0m | [95m 0.1972  [0m | [95m 84.63   [0m | [95m 7.466   [0m | [95m 70.77   [0m | [95m 12.12   [0m | [95m 52.5    [0m | [95m 0.258   [0m |
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1080
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1080
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1080
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Numbe

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


| [0m 4       [0m | [0m 0.5     [0m | [0m 0.8202  [0m | [0m 0.6478  [0m | [0m 0.02198 [0m | [0m 87.62   [0m | [0m 15.66   [0m | [0m 60.78   [0m | [0m 32.93   [0m | [0m 25.93   [0m | [0m 0.8056  [0m |
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 300
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 300
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 300
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive:

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.



| [0m 5       [0m | [0m 0.5     [0m | [0m 0.9864  [0m | [0m 0.3546  [0m | [0m 0.1302  [0m | [0m 38.59   [0m | [0m 5.378   [0m | [0m 45.14   [0m | [0m 66.6    [0m | [0m 43.11   [0m | [0m 0.8559  [0m |
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 480
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 480
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 480
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 32
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 32
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 32
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 32
[LightGBM] [Info

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 660
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 660
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 660
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 660
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 59
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 59
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 59
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 118
[LightGBM] [Info] Number of data points

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 180
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 180
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 180
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 180
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data po

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGB

[LightGBM] [Fatal] Reducing `min_data_in_leaf` with `feature_pre_filter=true` may cause unexpected behaviour for features that were pre-filtered by the larger `min_data_in_leaf`.
You need to set `feature_pre_filter=false` to dynamically change the `min_data_in_leaf`.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 420
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 120
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 120
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 120
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
Yo

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 69, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 149, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGBM] [Info] Number of positive: 70, number of negative: 80
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1200
[LightGBM] [Info] Number of data points in the train set: 150, number of used features: 60
[LightGB

[LightGBM] [Fatal] Cannot change max_bin after constructed Dataset handle.


In [11]:
# optimal parameters
opt_params[1]["num_leaves"] = int(round(opt_params[1]["num_leaves"]))
opt_params[1]['max_depth'] = int(round(opt_params[1]['max_depth']))
opt_params[1]['min_data_in_leaf'] = int(round(opt_params[1]['min_data_in_leaf']))
opt_params[1]['max_bin'] = int(round(opt_params[1]['max_bin']))
opt_params[1]['objective']='binary'
opt_params[1]['metric']='auc'
opt_params[1]['is_unbalance']=True
opt_params[1]['boost_from_average']=False
opt_params[1]['verbose'] = -1
opt_params=opt_params[1]
del opt_params['feature_fraction']
opt_params

{'bagging_fraction': 1.0,
 'learning_rate': 0.2,
 'max_bin': 53,
 'max_depth': 20,
 'min_data_in_leaf': 20,
 'min_sum_hessian_in_leaf': 0.0,
 'num_leaves': 25,
 'subsample': 0.019507483536268744,
 'objective': 'binary',
 'metric': 'auc',
 'is_unbalance': True,
 'boost_from_average': False,
 'verbose': -1}

## Prediciton with best parameters and KFold Technique

In [12]:
x_train = train.drop("Class",axis=1)
y_train = train['Class']

In [13]:
x_train

Unnamed: 0,Feature 0,Feature 1,Feature 2,Feature 3,Feature 4,Feature 5,Feature 6,Feature 7,Feature 8,Feature 9,...,Feature 50,Feature 51,Feature 52,Feature 53,Feature 54,Feature 55,Feature 56,Feature 57,Feature 58,Feature 59
0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0125,0.0084,0.0089,0.0048,0.0094,0.0191,0.0140,0.0049,0.0052,0.0044
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.2280,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.0180,0.0244,0.0316,0.0164,0.0095,0.0078
3,0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0241,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117
4,0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0156,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.0187,0.0346,0.0168,0.0177,0.0393,0.1630,0.2028,0.1694,0.2328,0.2684,...,0.0203,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157
204,0.0323,0.0101,0.0298,0.0564,0.0760,0.0958,0.0990,0.1018,0.1030,0.2154,...,0.0051,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067
205,0.0522,0.0437,0.0180,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.0155,0.0160,0.0029,0.0051,0.0062,0.0089,0.0140,0.0138,0.0077,0.0031
206,0.0303,0.0353,0.0490,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0042,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048


In [14]:
%%time
folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=26)
oof = np.zeros(len(x_train))
predictions = np.zeros(len(X_val))

for fold_, (trn_idx, val_idx) in enumerate(folds.split(x_train, y_train)):
    print("Fold {}".format(fold_))
    trn_data = lgb.Dataset(x_train.iloc[trn_idx], label=y_train.iloc[trn_idx])
    val_data = lgb.Dataset(x_train.iloc[val_idx], label=y_train.iloc[val_idx])
    num_round = 10000
    clf = lgb.train(opt_params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=0,early_stopping_rounds = 250,)
    oof[val_idx] = clf.predict(x_train.iloc[val_idx], num_iteration=clf.best_iteration) 
    predictions += clf.predict(X_val, num_iteration=clf.best_iteration) / folds.n_splits
    print("CV score: {:<8.5f}".format(roc_auc_score(y_train, oof)))


Fold 0
CV score: 0.50701 
Fold 1




CV score: 0.52201 
Fold 2
CV score: 0.54648 
Fold 3
CV score: 0.57913 
Fold 4




CV score: 0.62524 
Fold 5
CV score: 0.67795 
Fold 6




CV score: 0.73697 
Fold 7
CV score: 0.80264 
Fold 8




CV score: 0.87308 
Fold 9
CV score: 0.94910 
CPU times: user 4 s, sys: 129 ms, total: 4.13 s
Wall time: 1.04 s




In [15]:
# putting threshold as 0.5
binary_predictions = [i>0.5 for i in predictions]

## Model Report

In [16]:
# Scoring our model
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,f1_score, precision_score, recall_score

# Confusion Matrix
print('Confusion Matrix')
print(confusion_matrix(Y_val, binary_predictions))
print('--'*50)

# Classification Report
print('Classification Report')
print(classification_report(Y_val, binary_predictions))


# Accuracy of our model
print('--'*50)
bayesOpt_accuracy = round(accuracy_score(Y_val, binary_predictions) * 100,8)
print('Accuracy = ', bayesOpt_accuracy,'%')


Confusion Matrix
[[ 7  4]
 [ 0 10]]
----------------------------------------------------------------------------------------------------
Classification Report
              precision    recall  f1-score   support

           0       1.00      0.64      0.78        11
           1       0.71      1.00      0.83        10

    accuracy                           0.81        21
   macro avg       0.86      0.82      0.81        21
weighted avg       0.86      0.81      0.80        21

----------------------------------------------------------------------------------------------------
Accuracy =  80.95238095 %
