In [1]:
import numpy as np
import pandas as pd
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression

from hyperopt import hp
from hyperopt import Trials

from xgboost import *

from shaphypetune import BoostSearch, BoostBoruta, BoostRFE, BoostRFA

import warnings
warnings.simplefilter('ignore')

In [2]:
X_clf, y_clf = make_classification(n_samples=8000, n_features=20, n_classes=2, 
                                   n_informative=4, n_redundant=6, random_state=0)

X_clf_train, X_clf_valid, y_clf_train, y_clf_valid = train_test_split(
    X_clf, y_clf, test_size=0.3, shuffle=False)

X_regr, y_regr = make_classification(n_samples=8000, n_features=20,
                                     n_informative=7, random_state=0)

X_regr_train, X_regr_valid, y_regr_train, y_regr_valid = train_test_split(
    X_regr, y_regr, test_size=0.3, shuffle=False)

In [3]:
param_grid = {
    'learning_rate': [0.2, 0.1],
    'num_leaves': [25, 30, 35],
    'max_depth': [10, 12]
}

param_dist = {
    'learning_rate': stats.uniform(0.09, 0.25),
    'num_leaves': stats.randint(20,40),
    'max_depth': [10, 12]
}

param_dist_hyperopt = {
    'booster': hp.choice('booster', ['gbtree', 'dart']),
    'max_depth': 15 + hp.randint('num_leaves', 5), 
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0)
}


regr_xgb = XGBRegressor(n_estimators=150, random_state=0, verbosity=0, n_jobs=-1)
clf_xgb = XGBClassifier(n_estimators=150, random_state=0, verbosity=0, n_jobs=-1)

# Hyperparameters Tuning

In [4]:
### HYPERPARAM TUNING WITH GRID-SEARCH ###

model = BoostSearch(clf_xgb, param_grid=param_grid)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)


12 trials detected for ('learning_rate', 'num_leaves', 'max_depth')

trial: 0001 ### iterations: 00035 ### eval_score: 0.24809
trial: 0002 ### iterations: 00045 ### eval_score: 0.24361
trial: 0003 ### iterations: 00035 ### eval_score: 0.24809
trial: 0004 ### iterations: 00045 ### eval_score: 0.24361
trial: 0005 ### iterations: 00035 ### eval_score: 0.24809
trial: 0006 ### iterations: 00045 ### eval_score: 0.24361
trial: 0007 ### iterations: 00102 ### eval_score: 0.24251
trial: 0008 ### iterations: 00057 ### eval_score: 0.24229
trial: 0009 ### iterations: 00102 ### eval_score: 0.24251
trial: 0010 ### iterations: 00057 ### eval_score: 0.24229
trial: 0011 ### iterations: 00102 ### eval_score: 0.24251
trial: 0012 ### iterations: 00057 ### eval_score: 0.24229


BoostSearch(estimator=XGBClassifier(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=150, n_jobs=-1,
                                    num_parallel_tree=None, predictor=None,
                                    random_state=0, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                               

In [5]:
model.estimator_, model.best_params_, model.best_score_

(XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.1, max_delta_step=0,
               max_depth=12, min_child_weight=1, missing=nan,
               monotone_constraints='()', n_estimators=150, n_jobs=-1,
               num_leaves=25, num_parallel_tree=1, predictor='auto',
               random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
               subsample=1, tree_method='exact', validate_parameters=1,
               verbosity=0),
 {'learning_rate': 0.1, 'num_leaves': 25, 'max_depth': 12},
 0.24229)

In [6]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.predict_proba(X_clf_valid).shape)

(0.9083333333333333, (2400,), (2400, 2))

In [7]:
### HYPERPARAM TUNING WITH RANDOM-SEARCH ###

model = BoostSearch(
    regr_xgb, param_grid=param_dist,
    n_iter=10, sampling_seed=0
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)


10 trials detected for ('learning_rate', 'num_leaves', 'max_depth')

trial: 0001 ### iterations: 00023 ### eval_score: 0.24836
trial: 0002 ### iterations: 00023 ### eval_score: 0.24699
trial: 0003 ### iterations: 00066 ### eval_score: 0.25107
trial: 0004 ### iterations: 00016 ### eval_score: 0.24739
trial: 0005 ### iterations: 00038 ### eval_score: 0.25396
trial: 0006 ### iterations: 00062 ### eval_score: 0.25336
trial: 0007 ### iterations: 00031 ### eval_score: 0.25981
trial: 0008 ### iterations: 00036 ### eval_score: 0.2589
trial: 0009 ### iterations: 00020 ### eval_score: 0.25039
trial: 0010 ### iterations: 00053 ### eval_score: 0.24274


BoostSearch(estimator=XGBRegressor(base_score=None, booster=None,
                                   colsample_bylevel=None,
                                   colsample_bynode=None, colsample_bytree=None,
                                   enable_categorical=False, gamma=None,
                                   gpu_id=None, importance_type=None,
                                   interaction_constraints=None,
                                   learning_rate=None, max_delta_step=None,
                                   max_depth=None, min_child_weight=None,
                                   missing=nan, monotone_constraints=None,
                                   n_estim...
                                   random_state=0, reg_alpha=None,
                                   reg_lambda=None, scale_pos_weight=None,
                                   subsample=None, tree_method=None,
                                   validate_parameters=None, verbosity=0),
            n_iter=10,
      

In [8]:
model.estimator_, model.best_params_, model.best_score_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.16076048691663583,
              max_delta_step=0, max_depth=10, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_leaves=20, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=0),
 {'learning_rate': 0.16076048691663583, 'num_leaves': 20, 'max_depth': 10},
 0.242736)

In [9]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape)

(0.7642148151799497, (2400,))

In [10]:
### HYPERPARAM TUNING WITH HYPEROPT ###

model = BoostSearch(
    regr_xgb, param_grid=param_dist_hyperopt,
    n_iter=10, sampling_seed=0
)
model.fit(
    X_regr_train, y_regr_train, trials=Trials(), 
    eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0
)


10 trials detected for ('booster', 'max_depth', 'learning_rate', 'colsample_bytree')

trial: 0001 ### iterations: 00149 ### eval_score: 0.24655
trial: 0002 ### iterations: 00074 ### eval_score: 0.24721
trial: 0003 ### iterations: 00055 ### eval_score: 0.25364
trial: 0004 ### iterations: 00149 ### eval_score: 0.26149
trial: 0005 ### iterations: 00149 ### eval_score: 0.24925
trial: 0006 ### iterations: 00148 ### eval_score: 0.2412
trial: 0007 ### iterations: 00050 ### eval_score: 0.25363
trial: 0008 ### iterations: 00136 ### eval_score: 0.26054
trial: 0009 ### iterations: 00149 ### eval_score: 0.23867
trial: 0010 ### iterations: 00149 ### eval_score: 0.23923


BoostSearch(estimator=XGBRegressor(base_score=None, booster=None,
                                   colsample_bylevel=None,
                                   colsample_bynode=None, colsample_bytree=None,
                                   enable_categorical=False, gamma=None,
                                   gpu_id=None, importance_type=None,
                                   interaction_constraints=None,
                                   learning_rate=None, max_delta_step=None,
                                   max_depth=None, min_child_weight=None,
                                   missing=nan, monotone_constraints=None,
                                   n_estim...
                                   subsample=None, tree_method=None,
                                   validate_parameters=None, verbosity=0),
            n_iter=10,
            param_grid={'booster': <hyperopt.pyll.base.Apply object at 0x0000020FB18490C8>,
                        'colsample_bytree': <hyperopt.py

In [11]:
model.estimator_, model.best_params_, model.best_score_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.8887109431944138,
              enable_categorical=False, gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.04848590730025312,
              max_delta_step=0, max_depth=15, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
              validate_parameters=1, verbosity=0),
 {'booster': 'gbtree',
  'colsample_bytree': 0.8887109431944138,
  'learning_rate': 0.04848590730025312,
  'max_depth': 15},
 0.238674)

In [12]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape)

(0.7720398147776196, (2400,))

# Features Selection

In [13]:
### BORUTA ###

model = BoostBoruta(clf_xgb, max_iter=200, perc=100)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)

BoostBoruta(estimator=XGBClassifier(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=150, n_jobs=-1,
                                    num_parallel_tree=None, predictor=None,
                                    random_state=0, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                               

In [14]:
model.estimator_, model.n_features_

(XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.300000012,
               max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
               monotone_constraints='()', n_estimators=150, n_jobs=-1,
               num_parallel_tree=1, predictor='auto', random_state=0,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', validate_parameters=1, verbosity=0),
 10)

In [15]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.905, (2400,), (2400, 10), (2400, 2))

In [16]:
### RECURSIVE FEATURE ELIMINATION (RFE) ###

model = BoostRFE(regr_xgb, min_features_to_select=1, step=1)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

BoostRFE(estimator=XGBRegressor(base_score=None, booster=None,
                                colsample_bylevel=None, colsample_bynode=None,
                                colsample_bytree=None, enable_categorical=False,
                                gamma=None, gpu_id=None, importance_type=None,
                                interaction_constraints=None,
                                learning_rate=None, max_delta_step=None,
                                max_depth=None, min_child_weight=None,
                                missing=nan, monotone_constraints=None,
                                n_estimators=150, n_jobs=-1,
                                num_parallel_tree=None, predictor=None,
                                random_state=0, reg_alpha=None, reg_lambda=None,
                                scale_pos_weight=None, subsample=None,
                                tree_method=None, validate_parameters=None,
                                verbosity=0),
         min_

In [17]:
model.estimator_, model.n_features_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
              validate_parameters=1, verbosity=0),
 11)

In [18]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7690769632066736, (2400,), (2400, 11))

In [19]:
### RECURSIVE FEATURE ADDITION (RFA) ###

model = BoostRFA(regr_xgb, min_features_to_select=1, step=1)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

BoostRFA(estimator=XGBRegressor(base_score=None, booster=None,
                                colsample_bylevel=None, colsample_bynode=None,
                                colsample_bytree=None, enable_categorical=False,
                                gamma=None, gpu_id=None, importance_type=None,
                                interaction_constraints=None,
                                learning_rate=None, max_delta_step=None,
                                max_depth=None, min_child_weight=None,
                                missing=nan, monotone_constraints=None,
                                n_estimators=150, n_jobs=-1,
                                num_parallel_tree=None, predictor=None,
                                random_state=0, reg_alpha=None, reg_lambda=None,
                                scale_pos_weight=None, subsample=None,
                                tree_method=None, validate_parameters=None,
                                verbosity=0),
         min_

In [20]:
model.estimator_, model.n_features_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
              validate_parameters=1, verbosity=0),
 9)

In [21]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7655725942667062, (2400,), (2400, 9))

# Features Selection with SHAP

In [22]:
### BORUTA SHAP ###

model = BoostBoruta(
    clf_xgb, max_iter=200, perc=100,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)

BoostBoruta(estimator=XGBClassifier(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=150, n_jobs=-1,
                                    num_parallel_tree=None, predictor=None,
                                    random_state=0, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                               

In [23]:
model.estimator_, model.n_features_

(XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.300000012,
               max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
               monotone_constraints='()', n_estimators=150, n_jobs=-1,
               num_parallel_tree=1, predictor='auto', random_state=0,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', validate_parameters=1, verbosity=0),
 11)

In [24]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.905, (2400,), (2400, 11), (2400, 2))

In [25]:
### RECURSIVE FEATURE ELIMINATION (RFE) SHAP ###

model = BoostRFE(
    regr_xgb, min_features_to_select=1, step=1,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

BoostRFE(estimator=XGBRegressor(base_score=None, booster=None,
                                colsample_bylevel=None, colsample_bynode=None,
                                colsample_bytree=None, enable_categorical=False,
                                gamma=None, gpu_id=None, importance_type=None,
                                interaction_constraints=None,
                                learning_rate=None, max_delta_step=None,
                                max_depth=None, min_child_weight=None,
                                missing=nan, monotone_constraints=None,
                                n_estimators=150, n_jobs=-1,
                                num_parallel_tree=None, predictor=None,
                                random_state=0, reg_alpha=None, reg_lambda=None,
                                scale_pos_weight=None, subsample=None,
                                tree_method=None, validate_parameters=None,
                                verbosity=0),
         impo

In [26]:
model.estimator_, model.n_features_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
              validate_parameters=1, verbosity=0),
 8)

In [27]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7689351426525531, (2400,), (2400, 8))

In [28]:
### RECURSIVE FEATURE ADDITION (RFA) SHAP ###

model = BoostRFA(
    regr_xgb, min_features_to_select=1, step=1,
    importance_type='shap_importances', train_importance=False
)
model.fit(
    X_regr_train, y_regr_train, trials=Trials(), 
    eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0
)

BoostRFA(estimator=XGBRegressor(base_score=None, booster=None,
                                colsample_bylevel=None, colsample_bynode=None,
                                colsample_bytree=None, enable_categorical=False,
                                gamma=None, gpu_id=None, importance_type=None,
                                interaction_constraints=None,
                                learning_rate=None, max_delta_step=None,
                                max_depth=None, min_child_weight=None,
                                missing=nan, monotone_constraints=None,
                                n_estimators=150, n_jobs=-1,
                                num_parallel_tree=None, predictor=None,
                                random_state=0, reg_alpha=None, reg_lambda=None,
                                scale_pos_weight=None, subsample=None,
                                tree_method=None, validate_parameters=None,
                                verbosity=0),
         impo

In [29]:
model.estimator_, model.n_features_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
              validate_parameters=1, verbosity=0),
 9)

In [30]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7655725942667062, (2400,), (2400, 9))

# Hyperparameters Tuning + Features Selection

In [31]:
### HYPERPARAM TUNING WITH GRID-SEARCH + BORUTA ###

model = BoostBoruta(clf_xgb, param_grid=param_grid, max_iter=200, perc=100)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)


12 trials detected for ('learning_rate', 'num_leaves', 'max_depth')

trial: 0001 ### iterations: 00029 ### eval_score: 0.23693
trial: 0002 ### iterations: 00044 ### eval_score: 0.23354
trial: 0003 ### iterations: 00029 ### eval_score: 0.23693
trial: 0004 ### iterations: 00044 ### eval_score: 0.23354
trial: 0005 ### iterations: 00029 ### eval_score: 0.23693
trial: 0006 ### iterations: 00044 ### eval_score: 0.23354
trial: 0007 ### iterations: 00097 ### eval_score: 0.23423
trial: 0008 ### iterations: 00053 ### eval_score: 0.24083
trial: 0009 ### iterations: 00097 ### eval_score: 0.23423
trial: 0010 ### iterations: 00053 ### eval_score: 0.24083
trial: 0011 ### iterations: 00097 ### eval_score: 0.23423
trial: 0012 ### iterations: 00053 ### eval_score: 0.24083


BoostBoruta(estimator=XGBClassifier(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=150, n_jobs=-1,
                                    num_parallel_tree=None, predictor=None,
                                    random_state=0, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                               

In [32]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

(XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.2, max_delta_step=0,
               max_depth=12, min_child_weight=1, missing=nan,
               monotone_constraints='()', n_estimators=150, n_jobs=-1,
               num_leaves=25, num_parallel_tree=1, predictor='auto',
               random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
               subsample=1, tree_method='exact', validate_parameters=1,
               verbosity=0),
 {'learning_rate': 0.2, 'num_leaves': 25, 'max_depth': 12},
 0.233541,
 10)

In [33]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.9108333333333334, (2400,), (2400, 10), (2400, 2))

In [34]:
### HYPERPARAM TUNING WITH RANDOM-SEARCH + RECURSIVE FEATURE ELIMINATION (RFE) ###

model = BoostRFE(
    regr_xgb, param_grid=param_dist, min_features_to_select=1, step=1,
    n_iter=10, sampling_seed=0
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)


10 trials detected for ('learning_rate', 'num_leaves', 'max_depth')

trial: 0001 ### iterations: 00031 ### eval_score: 0.23639
trial: 0002 ### iterations: 00030 ### eval_score: 0.23515
trial: 0003 ### iterations: 00137 ### eval_score: 0.23894
trial: 0004 ### iterations: 00028 ### eval_score: 0.23804
trial: 0005 ### iterations: 00032 ### eval_score: 0.23966
trial: 0006 ### iterations: 00081 ### eval_score: 0.24102
trial: 0007 ### iterations: 00034 ### eval_score: 0.24126
trial: 0008 ### iterations: 00027 ### eval_score: 0.23949
trial: 0009 ### iterations: 00017 ### eval_score: 0.23766
trial: 0010 ### iterations: 00064 ### eval_score: 0.23398


BoostRFE(estimator=XGBRegressor(base_score=None, booster=None,
                                colsample_bylevel=None, colsample_bynode=None,
                                colsample_bytree=None, enable_categorical=False,
                                gamma=None, gpu_id=None, importance_type=None,
                                interaction_constraints=None,
                                learning_rate=None, max_delta_step=None,
                                max_depth=None, min_child_weight=None,
                                missing=nan, monotone_constraints=None,
                                n_estimato...
                                random_state=0, reg_alpha=None, reg_lambda=None,
                                scale_pos_weight=None, subsample=None,
                                tree_method=None, validate_parameters=None,
                                verbosity=0),
         min_features_to_select=1, n_iter=10,
         param_grid={'learning_rate': <scipy.stats._di

In [35]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

(XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.16076048691663583,
              max_delta_step=0, max_depth=10, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=150, n_jobs=-1,
              num_leaves=20, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=0),
 {'learning_rate': 0.16076048691663583, 'num_leaves': 20, 'max_depth': 10},
 0.233984,
 7)

In [36]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.780910689335803, (2400,), (2400, 7))

In [None]:
### HYPERPARAM TUNING WITH HYPEROPT + RECURSIVE FEATURE ADDITION (RFA) ###

model = BoostRFA(
    regr_xgb, param_grid=param_dist_hyperopt, min_features_to_select=1, step=1,
    n_iter=10, sampling_seed=0
)
model.fit(
    X_regr_train, y_regr_train, trials=Trials(), 
    eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0
)


10 trials detected for ('booster', 'max_depth', 'learning_rate', 'colsample_bytree')

trial: 0001 ### iterations: 00149 ### eval_score: 0.22941
trial: 0002 ### iterations: 00081 ### eval_score: 0.23149
trial: 0003 ### iterations: 00058 ### eval_score: 0.22928


In [None]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

In [None]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

# Hyperparameters Tuning + Features Selection with SHAP

In [None]:
### HYPERPARAM TUNING WITH GRID-SEARCH + BORUTA SHAP ###

model = BoostBoruta(
    clf_xgb, param_grid=param_grid, max_iter=200, perc=100,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)

In [None]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

In [None]:
### HYPERPARAM TUNING WITH RANDOM-SEARCH + RECURSIVE FEATURE ELIMINATION (RFE) SHAP ###

model = BoostRFE(
    regr_xgb, param_grid=param_dist, min_features_to_select=1, step=1,
    n_iter=10, sampling_seed=0,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

In [None]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

In [None]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

In [None]:
### HYPERPARAM TUNING WITH HYPEROPT + RECURSIVE FEATURE ADDITION (RFA) SHAP ###

model = BoostRFA(
    regr_xgb, param_grid=param_dist_hyperopt, min_features_to_select=1, step=1,
    n_iter=10, sampling_seed=0,
    importance_type='shap_importances', train_importance=False
)
model.fit(
    X_regr_train, y_regr_train, trials=Trials(), 
    eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0
)

In [None]:
model.estimator_, model.best_params_, model.best_score_, model.n_features_

In [None]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

# CUSTOM EVAL METRIC SUPPORT

In [None]:
from sklearn.metrics import roc_auc_score

def AUC(y_hat, dtrain):
    y_true = dtrain.get_label()
    return 'auc', roc_auc_score(y_true, y_hat)

In [None]:
model = BoostRFE(
    clf_xgb, 
    param_grid=param_grid, min_features_to_select=1, step=1,
    greater_is_better=True
)
model.fit(
    X_clf_train, y_clf_train, 
    eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0,
    eval_metric=AUC
)