<a href="https://colab.research.google.com/github/fasthill/ML-DL-study-alone/blob/main/5-1%20%EA%B2%B0%EC%A0%95%20%ED%8A%B8%EB%A6%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Analyse from LightGBM

<table align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/rickiepark/hg-mldl/blob/master/5-1.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />구글 코랩에서 실행하기</a>
  </td>
</table>

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from functools import reduce

In [26]:
from xgboost import XGBClassifier
import xgboost
from lightgbm import LGBMClassifier
import lightgbm

In [35]:
from sklearn.linear_model import LogisticRegression, SGDClassifier, SGDRegressor
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [4]:
# confusion matrix to list 변환
def matrix_to_list(confu_matrix):
    m_list = []
    tn = confu_matrix[0,0]
    fp = confu_matrix[0,1]
    fn = confu_matrix[1,0]
    tp = confu_matrix[1,1]
    m_list.extend([tn, fp, fn, tp])
    return m_list

In [5]:
def predict_p(test_target, y_predict_list): 
    ps = precision_score(test_target, y_predict_list)
    rs = recall_score(test_target, y_predict_list)
    fs = f1_score(test_target, y_predict_list)
    roc = roc_auc_score(test_target, y_predict_list)
#     cm = matrix_to_list(confusion_matrix(test_target, y_predict_list))
    collect_list = [ps, rs, fs, roc]
#     collect_list.extend(cm)
    return collect_list

In [10]:
def modelfit(alg, dtrain, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
        alg.set_params(n_estimators=cvresult.shape[0])
    
    #Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain['Disbursed'],eval_metric='auc')
        
    #Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
        
#     #Print model report:
#     print("\nModel Report")
#     print("Accuracy : %.4g" % metrics.accuracy_score(dtrain['Disbursed'].values, dtrain_predictions))
#     print("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain['Disbursed'], dtrain_predprob))
                    
    feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')

In [None]:
# #Choose all predictors except target & IDcols
# predictors = [x for x in train.columns if x not in [target, IDcol]]
# xgb1 = XGBClassifier(
#  learning_rate =0.1,
#  n_estimators=1000,
#  max_depth=5,
#  min_child_weight=1,
#  gamma=0,
#  subsample=0.8,
#  colsample_bytree=0.8,
#  objective= 'binary:logistic',
#  nthread=4,
#  scale_pos_weight=1,
#  seed=27)
# modelfit(xgb1, train, predictors)

In [6]:
code = {'005930' : ['삼성전자', 'sec'], '373220' : ['LG에너지솔루션', 'lgenergy'], 
        '000660' : ['SK하이닉스', 'skhinix'], '207940' : ['삼성바이오로직스', 'ssbio'],
        '006400' : ['삼성SDI', 'sdi'], '051910' : ['LG화학', 'lgchemical'],
        '005935' : ['삼성전자우', 'secpre'], '005380' : ['현대차', 'hyunmotor'],
        '035420' : ['NAVER', 'naver'], '000270' : ['기아','kia'],
        '035720' : ['카카오', 'kakao'], '005490' : ['POSCO홀딩스', 'poscoholding'],
        '105560' : ['KB금융', 'kbbank'], '028260' : ['삼성물산', 'sscnt'],
        '068270' : ['셀트리온', 'celltrion'], '012330' : ['현대모비스', 'mobis'],
        '055550' : ['신한지주', 'shgroup'], '066570' : ['LG전자', 'lgelec'],
        '003670' : ['포스코케미칼', 'poscochemical'], '096770' : ['SK이노베이션', 'skinnovation'],
        '033780' : ['KT&G', 'ktng']}

code = {'005930' : ['삼성전자', 'sec']}

In [7]:
fname = f'df_sec_sel.pkl'
stock_name = 'sec'
directory_for_ml = '../data/data_for_ml/'
f_name = directory_for_ml + fname
df = pd.read_pickle(f_name) 

# train, val,: 8, test: 2
split_ratio = 0.8
split_n = int(len(df)*split_ratio)

data = df.iloc[:split_n, :-5]
target = df.iloc[:split_n, -4]
test_input = df.iloc[split_n:, :-5]
test_target = df.iloc[split_n:, -4]

train_input, val_input, train_target, val_target = \
     train_test_split(data, target, random_state=42, test_size=0.2, stratify=target)

ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
val_scaled = ss.transform(val_input)
test_scaled = ss.transform(test_input)

In [40]:
lgbm = None
lgbm = lightgbm.LGBMClassifier(random_state=42,
                               learning_rate =0.22, # default = 0.1
                               num_iterations=500, # default=100
                               max_depth=5, # <= 0 means no limit, default=-1
                               bagging_fraction=0.9, # 0.0 < bagging_fraction <= 1.0, default=1
                               feature_fraction=0.8, # 0.0 < feature_fraction <= 1.0, default=1
                               objective= 'binary', 
#                                num_threads = 0, # == nthread=4, # Gpu 수, default=0, 자동 검색후 적용
                               max_delta_step = 0.5, # best value found, default = 0
#                             scale_pos_weight=40, # class imbalance 경감, scale_pos_weight > 0.0, default=1.0
                              ) 
lgbm.fit(train_scaled, train_target, eval_metric = 'logloss')
y_pred = lgbm.predict(val_scaled)
# val_error = mean_squared_error(val_target, y_pred) # 책에 없음
# print("Validation MSE:", val_error)           # 책에 없음

train_score_lgbm = lgbm.score(train_scaled, train_target)
val_score_lgbm = lgbm.score(val_scaled, val_target)
test_score_lgbm = lgbm.score(test_scaled, test_target)
ps, rs, fs, roc = predict_p(test_target, lgbm.predict(test_scaled))
cm = confusion_matrix(test_target, lgbm.predict(test_scaled))

print("train accuracy: {:.4f}, val accuracy {:.4f}, test accuracy {:.4f}".
      format(train_score_lgbm, val_score_lgbm, test_score_lgbm))
print("precision : {:.4f}, recall : {:.4f}, f1score : {:.4f}, roc : {:.4f}".
     format (ps, rs, fs, roc))
print(cm)

train accuracy: 1.0000, val accuracy 0.7907, test accuracy 0.8519
precision : 0.7368, recall : 0.8235, f1score : 0.7778, roc : 0.8442
[[32  5]
 [ 3 14]]




In [51]:
params = {
    "boosting_type" : ['gbdt'],
    "max_depth": [2, 3, 4],
    "num_leaves": [3, 4, 5],
    "learning_rate": [0.004, 0.005, 0.006],
    "objective": ['binary'],
    "metric": ['loss'],
    "colsample_bytree": [0.41, 0.42, 0.43, 0.45],
    "subsample": [0.005, 0.01, 0.015],
    "n_estimators": [5, 10, 20, 30],
#     "num_iterations": [100, 200, 300, 400, 500],
#     "num_class": [1]
#     "metric" : "rmse",
#     "bagging_frequency" : 5,
#     "bagging_seed" : 2018,
#     "verbosity" : -1,

#     # Selected rounded-off params
#     'bagging_fraction': 0.7,
#     'feature_fraction': 0.1,
#     'lambda_l1': 1,
#     'lambda_l2': 0,
#     'max_depth': 9,
#     'min_child_weight': 5,
#     'min_split_gain': 0,
#     'num_leaves': 24
}
lgbm = None
lgbmgs = None

lgbm = lightgbm.LGBMClassifier( random_state=42, 
                               learning_rate =0.22, # default = 0.1
                               num_iterations=500, # default=100
                               max_depth=5, # <= 0 means no limit, default=-1
#                                bagging_fraction=0.9, # 0.0 < bagging_fraction <= 1.0, default=1
#                                feature_fraction=0.8, # 0.0 < feature_fraction <= 1.0, default=1
                               objective= 'binary', 
#                                num_threads = 0, # == nthread=4, # Gpu 수, default=0, 자동 검색후 적용
                               max_delta_step = 0.5, # best value found, default = 0
#                                scale_pos_weight=40, # class imbalance 경감, scale_pos_weight > 0.0, default=1.0
                               eval_metric = 'logloss'
                                )

lgbmgs = GridSearchCV(estimator = lgbm,
                      param_grid = params,
                      cv = StratifiedKFold(n_splits=5),
                      scoring = 'precision', 
                      verbose = 1,
                      n_jobs=4,
                      )

lgbmgs.fit(train_scaled, train_target)
    
print("Best Estimator: {}".format(lgbmgs.best_estimator_)) 
print("Best Parameters: {}".format(lgbmgs.best_params_))  # 최적 파라미터.
print('Best Score: {}, Best Index: {}'.format(lgbmgs.best_score_ , lgbmgs.best_index_))  # 교차검증된 점수를 보여줌.

model = lgbmgs.best_estimator_  # 최적의 파라미터로 모델 생성
y_pred = model.predict(test_scaled)

train_score = model.score(train_scaled, train_target)
val_score = model.score(val_scaled, val_target)
test_score = model.score(test_scaled, test_target)
ps, rs, fs, roc = predict_p(test_target, model.predict(test_scaled))
cm = confusion_matrix(test_target, model.predict(test_scaled))

print("train accuracy: {:.4f}, val accuracy {:.4f}, test accuracy {:.4f}".
      format(train_score, val_score, test_score))
print("precision : {:.4f}, recall : {:.4f}, f1score : {:.4f}, roc : {:.4f}".
     format (ps, rs, fs, roc))
print(cm)

Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
Best Estimator: LGBMClassifier(colsample_bytree=0.41, eval_metric='logloss',
               learning_rate=0.004, max_delta_step=0.5, max_depth=2,
               metric='loss', n_estimators=5, num_iterations=500, num_leaves=3,
               objective='binary', random_state=42, subsample=0.005)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.41, 'learning_rate': 0.004, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 5, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.005}
Best Score: 0.9333333333333332, Best Index: 0
train accuracy: 0.8081, val accuracy 0.7442, test accuracy 0.7593
precision : 1.0000, recall : 0.2353, f1score : 0.3810, roc : 0.6176
[[37  0]
 [13  4]]




In [53]:
for i in ['mean_test_score', 'std_test_score']:
        print(i," : ", gsearch1.cv_results_[i])

mean_test_score  :  [0.5        0.91806667        nan 0.5        0.90966667        nan
 0.5        0.91546667        nan 0.5        0.91293333        nan
 0.5        0.90963333        nan 0.5        0.9145            nan
 0.5        0.90633333        nan 0.5        0.9128            nan
 0.5        0.9112            nan 0.5        0.91456667        nan
 0.5        0.9088            nan 0.5        0.91283333        nan
 0.5        0.90803333        nan 0.5        0.91466667        nan
 0.5        0.90866667        nan 0.5        0.90796667        nan
 0.5        0.91706667        nan 0.5        0.92043333        nan
 0.5        0.90883333        nan 0.5        0.90966667        nan
 0.5        0.91113333        nan 0.5        0.89726667        nan
 0.5        0.89386667        nan 0.5        0.9096            nan
 0.5        0.90566667        nan 0.5        0.90386667        nan
 0.5        0.90383333        nan 0.5        0.91293333        nan
 0.5        0.90873333        nan 0.5     

In [36]:
gsearch1.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_max_depth', 'param_min_child_weight', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [52]:
# use best fit model using .best_estimator

best_xgb = gsearch1.best_estimator_
y_pred = best_xgb.predict(val_scaled)
# val_error = mean_squared_error(val_target, y_pred) # 책에 없음
# print("Validation MSE:", val_error)           # 책에 없음

train_score_xgb = best_xgb.score(train_scaled, train_target)
val_score_xgb = best_xgb.score(val_scaled, val_target)
test_score_xgb = best_xgb.score(test_scaled, test_target)
ps, rs, fs, roc = predict_p(test_target, best_xgb.predict(test_scaled))
cm = confusion_matrix(test_target, best_xgb.predict(test_scaled))

print("train accuracy: {:.4f}, val accuracy {:.4f}, test accuracy {:.4f}".
      format(train_score_xgb, val_score_xgb, test_score_xgb))
print("precision : {:.4f}, recall : {:.4f}, f1score : {:.4f}, roc : {:.4f}".
     format (ps, rs, fs, roc))
print(cm)

train accuracy: 0.9884, val accuracy 0.8140, test accuracy 0.8148
precision : 0.6667, recall : 0.8235, f1score : 0.7368, roc : 0.8172
[[30  7]
 [ 3 14]]


In [None]:
Fitting 5 folds for each of 18000 candidates, totalling 90000 fits
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=0.5 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=0.5 will be ignored. Current value: bagging_fraction=0.9
Best Estimator: LGBMClassifier(bagging_fraction=0.9, colsample_bytree=0.5, feature_fraction=0.8,
               learning_rate=0.05, max_delta_step=0.5, max_depth=2,
               metric='auc', num_iterations=500, num_leaves=4,
               objective='binary', random_state=42, subsample=0.5)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.5, 'learning_rate': 0.05, 'max_depth': 2, 'metric': 'auc', 'n_estimators': 100, 'num_leaves': 4, 'objective': 'binary', 'subsample': 0.5}
Best Score: 0.8375180375180376, Best Index: 726
train accuracy: 1.0000, val accuracy 0.7907, test accuracy 0.8148
precision : 0.7059, recall : 0.7059, f1score : 0.7059, roc : 0.7854
[[32  5]
 [ 5 12]]

In [None]:
Fitting 5 folds for each of 7200 candidates, totalling 36000 fits
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=0.3 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=0.3 will be ignored. Current value: bagging_fraction=0.9
Best Estimator: LGBMClassifier(bagging_fraction=0.9, colsample_bytree=0.3, feature_fraction=0.8,
               max_delta_step=0.5, max_depth=4, metric='rmse', n_estimators=80,
               num_iterations=500, num_leaves=6, objective='binary',
               random_state=42, subsample=0.3)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.3, 'learning_rate': 0.1, 'max_depth': 4, 'metric': 'rmse', 'n_estimators': 80, 'num_leaves': 6, 'objective': 'binary', 'subsample': 0.3}
Best Score: 0.8377777777777778, Best Index: 915
train accuracy: 1.0000, val accuracy 0.7907, test accuracy 0.8519
precision : 0.7368, recall : 0.8235, f1score : 0.7778, roc : 0.8442
[[32  5]
 [ 3 14]]

In [None]:
Fitting 5 folds for each of 12500 candidates, totalling 62500 fits
[LightGBM] [Warning] feature_fraction is set=0.8, colsample_bytree=0.1 will be ignored. Current value: feature_fraction=0.8
[LightGBM] [Warning] bagging_fraction is set=0.9, subsample=0.1 will be ignored. Current value: bagging_fraction=0.9
Best Estimator: LGBMClassifier(bagging_fraction=0.9, colsample_bytree=0.1, feature_fraction=0.8,
               learning_rate=0.07, max_delta_step=0.5, max_depth=5,
               metric='rmse', n_estimators=50, num_iterations=500, num_leaves=6,
               objective='binary', random_state=42, subsample=0.1)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.1, 'learning_rate': 0.07, 'max_depth': 5, 'metric': 'rmse', 'n_estimators': 50, 'num_leaves': 6, 'objective': 'binary', 'subsample': 0.1}
Best Score: 0.8377777777777778, Best Index: 315
train accuracy: 1.0000, val accuracy 0.8140, test accuracy 0.8519
precision : 0.7368, recall : 0.8235, f1score : 0.7778, roc : 0.8442
[[32  5]
 [ 3 14]]

Fitting 5 folds for each of 12500 candidates, totalling 62500 fits
Best Estimator: LGBMClassifier(colsample_bytree=0.4, learning_rate=0.03, max_delta_step=0.5,
               max_depth=3, metric='loss', n_estimators=30, num_iterations=500,
               num_leaves=4, objective='binary', random_state=42,
               subsample=0.1)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.4, 'learning_rate': 0.03, 'max_depth': 3, 'metric': 'loss', 'n_estimators': 30, 'num_leaves': 4, 'objective': 'binary', 'subsample': 0.1}
Best Score: 0.8342857142857143, Best Index: 7605
train accuracy: 1.0000, val accuracy 0.7907, test accuracy 0.8704
precision : 0.7500, recall : 0.8824, f1score : 0.8108, roc : 0.8736
[[32  5]
 [ 2 15]]

In [None]:
Fitting 5 folds for each of 13500 candidates, totalling 67500 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.35, eval_metric='logloss', learning_rate=0.02,
               max_delta_step=0.5, max_depth=3, metric='loss', n_estimators=10,
               num_iterations=100, num_leaves=4, objective='binary',
               random_state=42, subsample=0.05)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.35, 'learning_rate': 0.02, 'max_depth': 3, 'metric': 'loss', 'n_estimators': 10, 'num_iterations': 100, 'num_leaves': 4, 'objective': 'binary', 'subsample': 0.05}
Best Score: 1.0, Best Index: 1204
train accuracy: 0.7849, val accuracy 0.7442, test accuracy 0.7037
precision : 1.0000, recall : 0.0588, f1score : 0.1111, roc : 0.5294
[[37  0]
 [16  1]]

In [None]:
Fitting 5 folds for each of 2700 candidates, totalling 13500 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.45, eval_metric='logloss', learning_rate=0.01,
               max_delta_step=0.5, max_depth=2, metric='loss', n_estimators=10,
               num_iterations=300, num_leaves=3, objective='binary',
               random_state=42, subsample=0.05)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.45, 'learning_rate': 0.01, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 10, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.05}
Best Score: 0.8638888888888889, Best Index: 1800
train accuracy: 0.9128, val accuracy 0.8140, test accuracy 0.8519
precision : 0.8000, recall : 0.7059, f1score : 0.7500, roc : 0.8124
[[34  3]
 [ 5 12]]

In [None]:
Fitting 5 folds for each of 2880 candidates, totalling 14400 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.4, eval_metric='logloss', learning_rate=0.01,
               max_delta_step=0.5, max_depth=2, metric='loss', n_estimators=5,
               num_iterations=200, num_leaves=3, objective='binary',
               random_state=42, subsample=0.03)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.4, 'learning_rate': 0.01, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 5, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.03}
Best Score: 0.9333333333333332, Best Index: 144
train accuracy: 0.8023, val accuracy 0.7442, test accuracy 0.7593
precision : 1.0000, recall : 0.2353, f1score : 0.3810, roc : 0.6176
[[37  0]
 [13  4]]

In [None]:
Fitting 5 folds for each of 2880 candidates, totalling 14400 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.42, eval_metric='logloss', learning_rate=0.01,
               max_delta_step=0.5, max_depth=2, metric='loss', n_estimators=5,
               num_iterations=300, num_leaves=3, objective='binary',
               random_state=42, subsample=0.03)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.42, 'learning_rate': 0.01, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 5, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.03}
Best Score: 0.8638888888888889, Best Index: 720
train accuracy: 0.9070, val accuracy 0.8140, test accuracy 0.8519
precision : 0.8000, recall : 0.7059, f1score : 0.7500, roc : 0.8124
[[34  3]
 [ 5 12]]

In [None]:
Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.42, eval_metric='logloss',
               learning_rate=0.005, max_delta_step=0.5, max_depth=2,
               metric='loss', n_estimators=5, num_iterations=500, num_leaves=3,
               objective='binary', random_state=42, subsample=0.01)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.42, 'learning_rate': 0.005, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 5, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.01}
Best Score: 0.8355555555555556, Best Index: 432
train accuracy: 0.8895, val accuracy 0.8372, test accuracy 0.8519
precision : 0.8462, recall : 0.6471, f1score : 0.7333, roc : 0.7965
[[35  2]
 [ 6 11]]

In [None]:
Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
[LightGBM] [Warning] Unknown parameter: eval_metric
Best Estimator: LGBMClassifier(colsample_bytree=0.41, eval_metric='logloss',
               learning_rate=0.004, max_delta_step=0.5, max_depth=2,
               metric='loss', n_estimators=5, num_iterations=500, num_leaves=3,
               objective='binary', random_state=42, subsample=0.005)
Best Parameters: {'boosting_type': 'gbdt', 'colsample_bytree': 0.41, 'learning_rate': 0.004, 'max_depth': 2, 'metric': 'loss', 'n_estimators': 5, 'num_leaves': 3, 'objective': 'binary', 'subsample': 0.005}
Best Score: 0.9333333333333332, Best Index: 0
train accuracy: 0.8081, val accuracy 0.7442, test accuracy 0.7593
precision : 1.0000, recall : 0.2353, f1score : 0.3810, roc : 0.6176
[[37  0]
 [13  4]]