In [1]:
import numpy as np
import pandas as pd
import feather
from lightgbm import LGBMClassifier
from skopt import BayesSearchCV
from sklearn.model_selection import KFold

In [2]:
def constrain_to_range(x,upper_lim,lower_lim):
    return min(max(x, lower_lim),upper_lim)

In [3]:
classifier = LGBMClassifier(verbose = 0, learning_rate = 0.02, metric = "auc", n_estimators = 3000, device = "gpu")
lgbm_parameter_ranges = {"num_leaves":(20,40),
                         "colsample_by_tree":(0.6,0.95),
                         "subsample":(0.7,0.95),
                         "max_depth":(4,10),
                         "reg_alpha":(0.01,0.2),
                         "reg_lambda":(0.01,0.2),
                         "min_split_gain":(0.01,0.1),
                         "min_child_weight":(1,10)}

In [4]:
bayes_cv = BayesSearchCV(estimator = classifier,
                         search_spaces = lgbm_parameter_ranges,
                         scoring = "roc_auc",
                         n_iter = 10,
                         cv = 5,
                         verbose = 1)

In [5]:
train = pd.read_feather("./../Solution attempts/v09 train data.feather")
target = pd.read_feather("./../Solution attempts/v09 target.feather")["TARGET"]

In [6]:
result = bayes_cv.fit(train, target)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  6.0min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.8min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.8min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  5.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.9min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  5.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.4min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  6.5min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  3.7min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.9min finished


In [8]:
vars(result)

{'best_estimator_': LGBMClassifier(boosting_type='gbdt', class_weight=None,
         colsample_by_tree=0.890363707448339, colsample_bytree=1.0,
         device='gpu', importance_type='split', learning_rate=0.02,
         max_depth=4, metric='auc', min_child_samples=20,
         min_child_weight=9, min_split_gain=0.09271570164344667,
         n_estimators=3000, n_jobs=-1, num_leaves=24, objective=None,
         random_state=None, reg_alpha=0.08389077206147642,
         reg_lambda=0.09134552272436029, silent=True,
         subsample=0.7707792041437583, subsample_for_bin=200000,
         subsample_freq=0, verbose=0),
 'best_index_': 8,
 'cv': 5,
 'cv_results_': defaultdict(list,
             {'mean_fit_time': [67.01760053634644,
               53.821556901931764,
               54.52278985977173,
               59.80710654258728,
               54.64923405647278,
               58.3823842048645,
               49.58451762199402,
               73.2758309841156,
               41.261590480