In [1]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, train_test_split

from lightgbm import *

from shaphypetune import BoostBoruta, BoostRFE

import warnings
warnings.simplefilter('ignore')

In [2]:
X, y = make_classification(n_samples=8000, n_features=20, n_classes=2, 
                           n_informative=4, n_redundant=6, random_state=0)

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.3, shuffle=False)

In [3]:
from sklearn.base import BaseEstimator
from sklearn.utils.metaestimators import if_delegate_has_method


class BoostRFEWrap(BaseEstimator, BoostRFE):

    @if_delegate_has_method(delegate='estimator')
    def predict_proba(self, X):
        return self.predict(X, method='predict_proba')
    
    
class BoostBorutaWrap(BaseEstimator, BoostBoruta):

    @if_delegate_has_method(delegate='estimator')
    def predict_proba(self, X):
        return self.predict(X, method='predict_proba')

In [4]:
parameters = {
            'learning_rate': [0.2, 0.1],
            'num_leaves': [25, 30, 35],
            }

model = BoostBorutaWrap(LGBMClassifier(n_estimators= 100),
                        param_grid = parameters, 
                        max_iter=200, perc=100)

pipe = make_pipeline(StandardScaler(), model)
pipe.fit(X_train, y_train, 
         boostborutawrap__eval_set=[(X_valid, y_valid)], 
         boostborutawrap__early_stopping_rounds=6, boostborutawrap__verbose=0)


6 trials detected for ('learning_rate', 'num_leaves')

trial: 0001 ### iterations: 00012 ### eval_score: 0.45358
trial: 0002 ### iterations: 00011 ### eval_score: 0.43826
trial: 0003 ### iterations: 00012 ### eval_score: 0.41958
trial: 0004 ### iterations: 00022 ### eval_score: 0.43875
trial: 0005 ### iterations: 00022 ### eval_score: 0.42605
trial: 0006 ### iterations: 00026 ### eval_score: 0.42618


Pipeline(steps=[('standardscaler', StandardScaler()),
                ('boostborutawrap',
                 BoostBorutaWrap(estimator=LGBMClassifier(), max_iter=200,
                                 param_grid={'learning_rate': [0.2, 0.1],
                                             'num_leaves': [25, 30, 35]}))])

In [5]:
pipe.predict(X_valid).shape, pipe.predict_proba(X_valid).shape, pipe.transform(X_valid).shape

((2400,), (2400, 2), (2400, 10))

In [6]:
model = BoostRFEWrap(LGBMClassifier(n_estimators= 50),
                     min_features_to_select=10, step=3)

parameters = {
            'estimator__learning_rate': [0.2, 0.1],
            'estimator__num_leaves': [25, 30, 35],
            }

search = GridSearchCV(
    model, parameters, n_jobs=-1)

search.fit(X_train, y_train)

GridSearchCV(estimator=BoostRFEWrap(estimator=LGBMClassifier(n_estimators=50),
                                    min_features_to_select=10, step=3),
             n_jobs=-1,
             param_grid={'estimator__learning_rate': [0.2, 0.1],
                         'estimator__num_leaves': [25, 30, 35]})

In [7]:
search.predict(X_valid).shape, search.predict_proba(X_valid).shape, search.transform(X_valid).shape

((2400,), (2400, 2), (2400, 10))