In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")

In [2]:
from sklearn.datasets import load_iris,load_breast_cancer

from treesbo.tuning import main_tuning_with_bo
from sklearn.model_selection import train_test_split
# data = load_iris()
# X,y = data.data, data.target
import numpy as np
import random 
import os
SEED = 66666
random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
data = load_breast_cancer()
X,y = data.data, data.target

rdm = np.random.randn(X.shape[0],X.shape[1])
rdm_int = np.random.randint(1,20,(X.shape[0],X.shape[1]))

# X = X+rdm*rdm_int


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [10]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X_train,y_train)
lgb_val = lgb.Dataset(X_val,y_val,reference=lgb_train)

params = {
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "objective": "binary",
    "metric":  'auc',
    'n_jobs':-1,
    'seed':12
}

model_1 = lgb.train(params,lgb_train, num_boost_round=1000,
                  valid_sets=[lgb_train, lgb_val], 
                  early_stopping_rounds=50, 
                  verbose_eval=20)

Training until validation scores don't improve for 50 rounds.
[20]	training's auc: 0.990678	valid_1's auc: 0.993216
[40]	training's auc: 0.992051	valid_1's auc: 0.993339
[60]	training's auc: 0.99322	valid_1's auc: 0.993092
[80]	training's auc: 0.993746	valid_1's auc: 0.992722
Early stopping, best iteration is:
[41]	training's auc: 0.992256	valid_1's auc: 0.993709


In [11]:
# train-test-split
main_tuning_with_bo(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    model_nm='LGB',
    max_evals=10,
    eval_metric='auc',
    task='c')

Training until validation scores don't improve for 200 rounds.
[50]	training's auc: 0.985023	valid_1's auc: 0.993339
[100]	training's auc: 0.98476	valid_1's auc: 0.992846
[150]	training's auc: 0.985067	valid_1's auc: 0.992846
[200]	training's auc: 0.986967	valid_1's auc: 0.992352
Early stopping, best iteration is:                  
[13]	training's auc: 0.980114	valid_1's auc: 0.995128
Trial file exists, will be renamed...               
Training until validation scores don't improve for 200 rounds.                
[50]	training's auc: 0.984395	valid_1's auc: 0.987048                         
[100]	training's auc: 0.98647	valid_1's auc: 0.990255                         
[150]	training's auc: 0.987113	valid_1's auc: 0.990872                        
[200]	training's auc: 0.987639	valid_1's auc: 0.991612                        
[250]	training's auc: 0.987697	valid_1's auc: 0.991365                        
[300]	training's auc: 0.987697	valid_1's auc: 0.991365                        
[350]	





Early stopping, best iteration is:
[208]	training's auc: 0.9891	valid_1's auc: 0.994696


({'bagging_fraction': 0.2334537065821213,
  'bagging_freq': 1,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.17016679527037032,
  'learning_rate': 0.05,
  'max_depth': 11,
  'metric': 'auc',
  'min_data_in_leaf': 10,
  'n_jobs': -1,
  'num_leaves': 60,
  'objective': 'binary',
  'reg_alpha': 1.3100171316698228,
  'reg_lambda': 8.80282123179531,
  'seed': 666,
  'n_estimators': 673},
 -0.9974096459849513)

In [16]:
# cv
main_tuning_with_bo(
    X,
    y,
    model_nm='LGB',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='auc',
    task='c')

[30]	cv_agg's auc: 0.5 + 0                          
[60]	cv_agg's auc: 0.5 + 0                          
[90]	cv_agg's auc: 0.5 + 0                          
[120]	cv_agg's auc: 0.5 + 0                         
[150]	cv_agg's auc: 0.5 + 0                         
[180]	cv_agg's auc: 0.5 + 0                         
Trial file exists, will be renamed...               
[30]	cv_agg's auc: 0.5 + 0                                     
[60]	cv_agg's auc: 0.5 + 0                                     
[90]	cv_agg's auc: 0.5 + 0                                     
[120]	cv_agg's auc: 0.5 + 0                                    
[150]	cv_agg's auc: 0.5 + 0                                    
[180]	cv_agg's auc: 0.5 + 0                                    
[30]	cv_agg's auc: 0.961192 + 0.0159027                        
[60]	cv_agg's auc: 0.969821 + 0.00919435                       
[90]	cv_agg's auc: 0.969916 + 0.00936251                       
[120]	cv_agg's auc: 0.971815 + 0.0087026             





({'bagging_fraction': 0.2334537065821213,
  'bagging_freq': 4,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.17016679527037032,
  'learning_rate': 0.05,
  'max_depth': 11,
  'metric': 'auc',
  'min_data_in_leaf': 20,
  'n_jobs': -1,
  'num_leaves': 160,
  'objective': 'binary',
  'reg_alpha': 1.3100171316698228,
  'reg_lambda': 8.80282123179531,
  'seed': 666,
  'n_estimators': 341},
 -0.9753449412286624)