In [None]:
import pickle

import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import roc_auc_score, roc_curve, auc
from bayes_opt import BayesianOptimization

from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier
import lightgbm as lgb
import xgboost as xgb

# Config
is_GPU_accelerated = False
is_pretuned = False

In [None]:
def pickle_load(path):
    return pickle.load(open(path, 'rb'))

X = pickle_load('./X.pickle')
y = pickle_load('./y.pickle')
# P = pickle_load('../input/P.pickle')
# P_id = pickle_load('../input/P_id.pickle')

In [None]:
def bays_kfold_optimizer(X, y,
                         model_constructor, fold_constructor,
                         fold_params, bounds, defaults, parms_int, params_fit,
                         splits=5, init_points=10, iterations=15, random_state=1):
    def compute_roc_auc(model,index):
        y_predict = model.predict_proba(X.iloc[index])[:,1]
        fpr, tpr, thresh = roc_curve(y.iloc[index], y_predict)
        auc_score = auc(fpr, tpr)
        return fpr, tpr, auc_score
    
    def build_model(**params):
        for param in parms_int:
            params[param] = int(params[param])
        
        model = model_constructor(**params, **defaults)

        all_auc_val_score = []
        for i in range(folds.n_splits):
                model.fit(X.iloc[X_ids[i],:], y.iloc[X_ids[i]], **params_fit)
                fpr, tpr, auc_score = compute_roc_auc(model, y_ids[i])
                all_auc_val_score.append(auc_score)

        return np.mean(all_auc_val_score)

    folds = fold_constructor(n_splits=splits, **fold_params)
    X_ids = []
    y_ids = []

    for (train, test), i in zip(folds.split(X, y), range(splits)):
        X_ids.append(train)
        y_ids.append(test)
        
    model_optimizer = BayesianOptimization(build_model, bounds, random_state=random_state)
    print(model_optimizer.space.keys)

    model_optimizer.maximize(init_points=init_points, n_iter=iterations, acq='ucb', xi=0.0, alpha=1e-6)

    print(model_optimizer.max['target'])
    params = model_optimizer.max['params']
    
    for param in parms_int:
        params[param] = int(params[param])
        
    print(params)
    
    return params

In [4]:
# Optimize LightGBM model parameters

best_params_lgb = {
    'num_leaves': 568,
    'min_data_in_leaf': 50,
    'n_estimators': 1795,
    'min_child_weight': 0.027327584490198106,
    'bagging_fraction': 0.4583494036456541,
    'feature_fraction': 0.4492446997014985,
    'learning_rate': 0.009019314773844423,
    'reg_alpha': 0.23150921255961304,
    'reg_lambda': 0.6834953421726443
}

if not is_pretuned:
    params_fold = {
        'shuffle': True
    }

    bounds_LGB = {
        'num_leaves': (200, 800), 
        'min_data_in_leaf': (50, 250),
        'n_estimators': (750, 1800),
        'min_child_weight': (0.01, 0.05),
        'bagging_fraction' : (0.2, 0.6),
        'feature_fraction' : (0.15, 0.6),
        'learning_rate': (0.005, 0.01),
        'reg_alpha': (0.2, 0.6), 
        'reg_lambda': (0.25, 1.0),
    }

    params_default_LGB = {
        'objective': 'binary',
        'max_depth': -1,
        'boosting_type': 'gbdt',
        'bagging_seed': 11,
        'metric': 'auc',
        'verbosity': -1,
        'random_state': 47
    }
    
    if is_GPU_accelerated:
        params_default_LGB['device'] = 'gpu',
        params_default_LGB['gpu_platform_id'] = 0,
        params_default_LGB['gpu_device_id'] = 0,

    params_int = ['num_leaves', 'min_data_in_leaf', 'n_estimators']
    
    params_fit = {
        'eval_metric': 'AUC'
    }

    params_fold = {
        'shuffle': True
    }

    best_params_lgb = bays_kfold_optimizer(X, y,
                                           LGBMClassifier, StratifiedKFold,
                                           params_fold, bounds_LGB, params_default_LGB, params_int,
                                           params_fit, splits=5, init_points=10, iterations=15,
                                           random_state=1)

['bagging_fraction', 'feature_fraction', 'learning_rate', 'min_child_weight', 'min_data_in_leaf', 'n_estimators', 'num_leaves', 'reg_alpha', 'reg_lambda']
|   iter    |  target   | baggin... | featur... | learni... | min_ch... | min_da... | n_esti... | num_le... | reg_alpha | reg_la... |
-------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.951   [0m | [0m 0.3668  [0m | [0m 0.4741  [0m | [0m 0.005001[0m | [0m 0.02209 [0m | [0m 79.35   [0m | [0m 847.0   [0m | [0m 311.8   [0m | [0m 0.3382  [0m | [0m 0.5476  [0m |
| [95m 2       [0m | [95m 0.9621  [0m | [95m 0.4155  [0m | [95m 0.3386  [0m | [95m 0.008426[0m | [95m 0.01818 [0m | [95m 225.6   [0m | [95m 778.8   [0m | [95m 602.3   [0m | [95m 0.3669  [0m | [95m 0.669   [0m |
| [95m 3       [0m | [95m 0.9706  [0m | [95m 0.2562  [0m | [95m 0.2391  [0m | [95m 0.009004[0m | [95m 0.04873 

In [None]:
# Optimize XGBoost model parameters

best_params_xgb = {
        
}

if not is_pretuned:
    params_fold = {
        'shuffle': True
    }

    bounds_XGB = {
        'n_estimators': (750, 1800),
        'max_depth': (4, 10), 
        'gamma': (0, 5),
        'colsample_bytree': (0.3, 0.9),
        'learning_rate': (0.005, 0.01),
        'reg_alpha': (0.2, 0.6),
        'reg_lambda': (0.25, 1.0),
        'subsample': (0.6, 1.0),
        'min_child_weight': (1, 5)
    }

    params_default_XGB = {
        'objective': 'binary:logistic',
        'grow_policy': 'lossguide',
        'tree_method': 'gpu_hist' if is_GPU_accelerated else 'hist'
    }

    params_int = ['max_depth', 'n_estimators']
    
    params_fit = {
        'eval_metric': 'auc'
    }
    
    best_params_xgb = bays_kfold_optimizer(X, y,
                                           XGBClassifier, StratifiedKFold,
                                           params_fold, bounds_XGB, params_default_XGB, params_int,
                                           params_fit, splits=5, init_points=10, iterations=15,
                                           random_state=1)

In [None]:
# Optimize CatBoost model parameters

best_params_cb = {
    'depth': 9,
    'n_estimators': 1797,
    'bagging_temperature': 0.20751057109061607,
    'l2_leaf_reg': 2.8624956025382984,
    'learning_rate': 0.036288040656411516,
    'random_strength': 0.5485053727871042
}

if not is_pretuned:
    params_fold = {
        'shuffle': True
    }

    bounds_CB = {
        'depth': (4, 10),
        'n_estimators': (750, 1800),
        'l2_leaf_reg': (2, 30),
        'learning_rate': (0.01, 0.04),
        'bagging_temperature': (0.1, 2.0),
        'random_strength': (0.5, 2.0)
    }

    params_default_CB = {
        'loss_function': 'Logloss',
        'custom_loss': ['AUC'],
        'eval_metric': 'AUC',
        'task_type': 'GPU' if is_GPU_accelerated else 'CPU'
    }

    params_int = ['depth', 'iterations', 'n_estimators']
    
    params_fit = {}

    best_params_cb = bays_kfold_optimizer(X, y,
                                          CatBoostClassifier, StratifiedKFold,
                                          params_fold, bounds_CB, params_default_CB, params_int,
                                          params_fit, splits=5, init_points=10, iterations=15,
                                          random_state=1)