In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")

In [2]:
from sklearn.datasets import load_diabetes
from treesbo.tuning import main_tuning_with_bo
from sklearn.model_selection import train_test_split

diabetes = load_diabetes()
X,y = diabetes['data'],diabetes['target']


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


# Baseline

In [20]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X_train,y_train)
lgb_val = lgb.Dataset(X_val,y_val,reference=lgb_train)

params = {
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "objective": "regression",
    "metric":  'l1',
    'n_jobs':-1,
    'seed':12
}

model_1 = lgb.train(params,lgb_train, num_boost_round=1000,
                  valid_sets=[lgb_train, lgb_val], 
                  early_stopping_rounds=50, 
                  verbose_eval=20)

Training until validation scores don't improve for 50 rounds.
[20]	training's l1: 45.4785	valid_1's l1: 48.8642
[40]	training's l1: 36.6612	valid_1's l1: 44.9735
[60]	training's l1: 31.6867	valid_1's l1: 44.5436
[80]	training's l1: 28.395	valid_1's l1: 44.6208
[100]	training's l1: 25.9022	valid_1's l1: 44.6808
Early stopping, best iteration is:
[58]	training's l1: 31.9954	valid_1's l1: 44.4924


In [29]:
import lightgbm as lgb

lgb_train = lgb.Dataset(X_train,y_train)
lgb_val = lgb.Dataset(X_val,y_val,reference=lgb_train)

params = {
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "objective": "regression",
    "metric":  'rmse',
    'n_jobs':-1,
    'seed':12
}


model_1 = lgb.train(params,lgb_train, num_boost_round=1000,
                  valid_sets=[lgb_train, lgb_val], 
                  early_stopping_rounds=50, 
                  verbose_eval=20)

Training until validation scores don't improve for 50 rounds.
[20]	training's rmse: 53.2919	valid_1's rmse: 58.6774
[40]	training's rmse: 44.0757	valid_1's rmse: 55.9887
[60]	training's rmse: 39.041	valid_1's rmse: 55.85
[80]	training's rmse: 35.5588	valid_1's rmse: 56.4404
[100]	training's rmse: 32.6186	valid_1's rmse: 57.0249
Early stopping, best iteration is:
[58]	training's rmse: 39.4357	valid_1's rmse: 55.7303


# With BO

### train-test-split

In [21]:
# train-test-split
main_tuning_with_bo(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    model_nm='LGB',
    max_evals=3,
    folds=None,
    nfold=3,
    eval_metric='l1',
    task='regression')

So it will be used L2 istead here!


Training until validation scores don't improve for 200 rounds.
[50]	training's l1: 48.6445	valid_1's l1: 44.9063  
[100]	training's l1: 44.7173	valid_1's l1: 43.1733 
[150]	training's l1: 42.3356	valid_1's l1: 42.3666 
[200]	training's l1: 40.9264	valid_1's l1: 42.4427 
[250]	training's l1: 39.7991	valid_1's l1: 42.1374 
[300]	training's l1: 38.7752	valid_1's l1: 42.198  
[350]	training's l1: 38.338	valid_1's l1: 42.3516  
Early stopping, best iteration is:                 
[160]	training's l1: 41.8494	valid_1's l1: 42.0866
Trial file exists, will be renamed...              
Training until validation scores don't improve for 200 rounds.             
[50]	training's l1: 50.1913	valid_1's l1: 46.2893                          
[100]	training's l1: 47.4148	valid_1's l1: 44.6575                         
[150]	training's l1: 45.8125	valid_1's l1: 43.7571                         
[200]	training's l1: 44.9836	valid_1's l1: 44.0903                         
[250]	training's l1: 44.348	valid_1's 

({'bagging_fraction': 0.7868403860665779,
  'bagging_freq': 10,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.21002941429365568,
  'learning_rate': 0.01,
  'max_depth': 18,
  'metric': 'l1',
  'min_data_in_leaf': 50,
  'min_split_gain': 1.7851929194051657,
  'n_jobs': -1,
  'num_leaves': 270,
  'reg_alpha': 6.817180284240959,
  'reg_lambda': 0.7052394266901507,
  'seed': 2019,
  'n_estimators': 1380},
 41.28148521189629)

In [35]:
# train-test-split
main_tuning_with_bo(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    model_nm='LGB',
    max_evals=3,
    folds=None,
    nfold=3,
    eval_metric='mae',
    task='regression')

So it will be used L2 istead here!


{'seed': 2019, 'metric': 'l1', 'n_jobs': -1, 'learning_rate': 0.05, 'boosting_type': 'gbdt', 'bagging_fraction': <hyperopt.pyll.base.Apply object at 0x11976af28>, 'feature_fraction': <hyperopt.pyll.base.Apply object at 0x119c12908>, 'bagging_freq': <hyperopt.pyll.base.Apply object at 0x119c12b70>, 'num_leaves': <hyperopt.pyll.base.Apply object at 0x119a85cc0>, 'max_depth': <hyperopt.pyll.base.Apply object at 0x119a85f98>, 'min_data_in_leaf': <hyperopt.pyll.base.Apply object at 0x119a85e80>, 'min_split_gain': <hyperopt.pyll.base.Apply object at 0x119a854e0>, 'reg_alpha': <hyperopt.pyll.base.Apply object at 0x119a85860>, 'reg_lambda': <hyperopt.pyll.base.Apply object at 0x119a85128>}
Training until validation scores don't improve for 200 rounds.
[50]	training's l1: 48.6445	valid_1's l1: 44.9063  
[100]	training's l1: 44.7173	valid_1's l1: 43.1733 
[150]	training's l1: 42.3356	valid_1's l1: 42.3666 
[200]	training's l1: 40.9264	valid_1's l1: 42.4427 
[250]	training's l1: 39.7991	valid_1's

({'bagging_fraction': 0.7868403860665779,
  'bagging_freq': 10,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.21002941429365568,
  'learning_rate': 0.01,
  'max_depth': 18,
  'metric': 'l1',
  'min_data_in_leaf': 50,
  'min_split_gain': 1.7851929194051657,
  'n_jobs': -1,
  'num_leaves': 270,
  'reg_alpha': 6.817180284240959,
  'reg_lambda': 0.7052394266901507,
  'seed': 2019,
  'n_estimators': 1380},
 41.28148521189629)

### CV 3 FOLDS

In [11]:
# cv
main_tuning_with_bo(
    X,
    y,
    model_nm='LGB',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l1',
    task='regression')

So it will be used L2 istead here!


[30]	cv_agg's l1: 52.5042 + 2.2142                  
[60]	cv_agg's l1: 48.5177 + 1.34953                 
[90]	cv_agg's l1: 47.2167 + 1.27261                 
[120]	cv_agg's l1: 46.4778 + 1.36312                
[150]	cv_agg's l1: 45.9 + 1.38557                   
[180]	cv_agg's l1: 45.5415 + 1.27126                
[210]	cv_agg's l1: 45.2723 + 1.24737                
[240]	cv_agg's l1: 45.3608 + 1.28178                
[270]	cv_agg's l1: 45.3642 + 1.34952                
[300]	cv_agg's l1: 45.3672 + 1.38487                
[330]	cv_agg's l1: 45.5225 + 1.53531                
[360]	cv_agg's l1: 45.3303 + 1.55716                
[390]	cv_agg's l1: 45.1662 + 1.48254                
[420]	cv_agg's l1: 45.0418 + 1.47019                
[450]	cv_agg's l1: 45.0244 + 1.44823                
[480]	cv_agg's l1: 45.1083 + 1.51138                
[510]	cv_agg's l1: 45.012 + 1.44321                 
[540]	cv_agg's l1: 45.1654 + 1.48193                
[570]	cv_agg's l1: 45.3014 + 1.55279          

[600]	cv_agg's l1: 45.9047 + 1.55715
[630]	cv_agg's l1: 45.7774 + 1.56861
[660]	cv_agg's l1: 45.6949 + 1.58217
[690]	cv_agg's l1: 45.6074 + 1.64533
[720]	cv_agg's l1: 45.5531 + 1.678
[750]	cv_agg's l1: 45.4614 + 1.74326
[780]	cv_agg's l1: 45.4283 + 1.78697
[810]	cv_agg's l1: 45.361 + 1.83833
[840]	cv_agg's l1: 45.2869 + 1.82937
[870]	cv_agg's l1: 45.2128 + 1.79534
[900]	cv_agg's l1: 45.1818 + 1.80775
[930]	cv_agg's l1: 45.1306 + 1.83322
[960]	cv_agg's l1: 45.0649 + 1.76424
[990]	cv_agg's l1: 45.0349 + 1.76244
[1020]	cv_agg's l1: 45.0009 + 1.78714
[1050]	cv_agg's l1: 44.9428 + 1.81168
[1080]	cv_agg's l1: 44.9186 + 1.77957
[1110]	cv_agg's l1: 44.8658 + 1.79708
[1140]	cv_agg's l1: 44.829 + 1.75537
[1170]	cv_agg's l1: 44.8134 + 1.7324
[1200]	cv_agg's l1: 44.8089 + 1.70196
[1230]	cv_agg's l1: 44.7776 + 1.68343
[1260]	cv_agg's l1: 44.7701 + 1.64033
[1290]	cv_agg's l1: 44.7895 + 1.68121
[1320]	cv_agg's l1: 44.8325 + 1.70528
[1350]	cv_agg's l1: 44.8377 + 1.71533


({'bagging_fraction': 0.6248086320962142,
  'bagging_freq': 10,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.37452063115501655,
  'learning_rate': 0.01,
  'max_depth': 9,
  'metric': 'l1',
  'min_data_in_leaf': 40,
  'min_split_gain': 4.552696720611812,
  'n_jobs': -1,
  'num_leaves': 78,
  'reg_alpha': 2.959817404685545,
  'reg_lambda': 7.381045569562488,
  'seed': 2019,
  'n_estimators': 1261},
 44.77007286835909)

In [5]:
# cv
main_tuning_with_bo(
    X,
    y,
    model_nm='LGB',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l2',
    task='regression')

{'seed': 2019, 'metric': 'rmse', 'n_jobs': -1, 'learning_rate': 0.05, 'boosting_type': 'gbdt', 'bagging_fraction': <hyperopt.pyll.base.Apply object at 0x10fe93ef0>, 'feature_fraction': <hyperopt.pyll.base.Apply object at 0x11449d080>, 'bagging_freq': <hyperopt.pyll.base.Apply object at 0x11449d208>, 'num_leaves': <hyperopt.pyll.base.Apply object at 0x104461438>, 'max_depth': <hyperopt.pyll.base.Apply object at 0x11449d320>, 'min_data_in_leaf': <hyperopt.pyll.base.Apply object at 0x11449d4e0>, 'min_split_gain': <hyperopt.pyll.base.Apply object at 0x11449d630>, 'reg_alpha': <hyperopt.pyll.base.Apply object at 0x11449d780>, 'reg_lambda': <hyperopt.pyll.base.Apply object at 0x11449d8d0>}
[30]	cv_agg's rmse: 61.9772 + 1.264                 
[60]	cv_agg's rmse: 57.9157 + 0.783656              
[90]	cv_agg's rmse: 56.8414 + 0.802361              
[120]	cv_agg's rmse: 56.2703 + 0.868574             
[150]	cv_agg's rmse: 56.1106 + 0.789522             
[180]	cv_agg's rmse: 55.9321 + 0.754512   

[1260]	cv_agg's rmse: 55.774 + 1.26051
[1290]	cv_agg's rmse: 55.7926 + 1.28096
[1320]	cv_agg's rmse: 55.8299 + 1.30771
[1350]	cv_agg's rmse: 55.8186 + 1.29348


({'bagging_fraction': 0.7868403860665779,
  'bagging_freq': 10,
  'boosting_type': 'gbdt',
  'feature_fraction': 0.21002941429365568,
  'learning_rate': 0.01,
  'max_depth': 18,
  'metric': 'rmse',
  'min_data_in_leaf': 50,
  'min_split_gain': 1.7851929194051657,
  'n_jobs': -1,
  'num_leaves': 270,
  'reg_alpha': 6.817180284240959,
  'reg_lambda': 0.7052394266901507,
  'seed': 2019,
  'n_estimators': 1262},
 55.77026806512952)

# XGB

In [6]:
main_tuning_with_bo(
    X,
    y,
    model_nm='XGB',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l1',
    task='regression')

So it will be used L2 istead here!


[0]	train-mae:144.79+1.58688	test-mae:144.674+3.15531

[30]	train-mae:55.4819+0.270798	test-mae:58.0097+0.910342

[60]	train-mae:44.1476+0.680028	test-mae:50.6916+1.92338

[90]	train-mae:39.2235+0.453847	test-mae:48.0825+2.05511

[120]	train-mae:35.7704+0.469341	test-mae:47.3195+1.95916

[150]	train-mae:32.8561+0.457133	test-mae:46.5768+1.27573

[180]	train-mae:30.6021+0.703221	test-mae:46.7267+1.12028

[210]	train-mae:28.9085+0.521156	test-mae:46.6344+1.09196

[240]	train-mae:27.2824+0.506861	test-mae:46.3007+0.849531

[270]	train-mae:26.1082+0.414329	test-mae:46.3963+1.07527

[300]	train-mae:24.6628+0.384299	test-mae:46.9173+0.965338

[330]	train-mae:23.5075+0.639405	test-mae:47.175+1.3638

[360]	train-mae:22.4638+0.567961	test-mae:47.1429+0.989869

[390]	train-mae:21.3535+0.473038	test-mae:47.4023+1.02934

[420]	train-mae:20.4363+0.346682	test-mae:47.5127+1.28892

[450]	train-mae:19.5735+0.282217	test-mae:47.6711+1.13087

Trial file exists, will be renamed...               
[0]	trai

[60]	train-mae:17.934+0.189418	test-mae:47.4127+0.705436                     

[90]	train-mae:9.37086+0.186607	test-mae:47.0102+1.02637                     

[120]	train-mae:5.34861+0.0834193	test-mae:47.1019+1.07714                   

[150]	train-mae:3.19467+0.048612	test-mae:47.1962+1.1133                     

[180]	train-mae:2.00962+0.0607989	test-mae:47.2055+1.11754                   

[210]	train-mae:1.34714+0.0394916	test-mae:47.2657+1.12836                   

[240]	train-mae:0.966572+0.0373195	test-mae:47.2812+1.14127                  

[270]	train-mae:0.751766+0.015349	test-mae:47.2881+1.14822                   

100%|██████████| 10/10 [00:08<00:00,  1.14it/s, best loss: 45.468147666666674]
[0]	train-mae:150.231+1.61484	test-mae:150.252+3.24751
[30]	train-mae:112.603+1.00474	test-mae:112.529+2.49397
[60]	train-mae:85.8402+0.745805	test-mae:86.5522+1.66947
[90]	train-mae:67.6953+0.67329	test-mae:70.0712+0.936423
[120]	train-mae:55.873+0.774448	test-mae:60.3757+0.510231
[150]	

({'booster': 'gbtree',
  'colsample_bytree': 0.4903958566626445,
  'eval_metric': 'mae',
  'gamma': 1.457443887822124,
  'learning_rate': 0.01,
  'max_depth': 17,
  'min_child_weight': 4.985713606757903,
  'n_jobs': -1,
  'random_state': 2019,
  'reg_alpha': 2.1322455501602184,
  'reg_lambda': 1.5550268749052552,
  'subsample': 0.5431576685342818,
  'n_estimators': 478},
 45.398359666666664)

In [7]:
main_tuning_with_bo(
    X,
    y,
    model_nm='XGB',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l2',
    task='regression')

[0]	train-rmse:163.746+1.54248	test-rmse:163.665+2.88092

[30]	train-rmse:72.3158+0.980664	test-rmse:74.6131+0.450408

[60]	train-rmse:54.6472+1.23188	test-rmse:61.4102+1.8725

[90]	train-rmse:48.3928+0.628459	test-rmse:58.3522+2.09893

[120]	train-rmse:44.4903+0.712086	test-rmse:57.8987+1.40934

[150]	train-rmse:41.3333+0.797742	test-rmse:57.6665+0.836105

[180]	train-rmse:38.8943+0.932248	test-rmse:58.0802+0.885228

[210]	train-rmse:36.9543+0.733497	test-rmse:58.0908+0.642937

[240]	train-rmse:35.1982+0.836268	test-rmse:58.1828+0.730164

[270]	train-rmse:33.7667+0.767799	test-rmse:58.2918+1.08404

[300]	train-rmse:31.9674+0.691884	test-rmse:58.7128+0.867273

Trial file exists, will be renamed...               
[0]	train-rmse:163.066+1.46943	test-rmse:163.122+2.82722                     

[30]	train-rmse:59.7489+0.469252	test-rmse:70.5514+1.33155                   

[60]	train-rmse:34.7562+0.368004	test-rmse:59.3084+1.40246                   

[90]	train-rmse:24.9971+0.480013	test-rms

[450]	train-rmse:30.2696+0.631415	test-rmse:57.059+1.21776
[480]	train-rmse:29.2679+0.618664	test-rmse:57.0743+1.22355
[510]	train-rmse:28.3482+0.641787	test-rmse:57.0779+1.20412
[540]	train-rmse:27.4192+0.637902	test-rmse:57.1613+1.17144




({'booster': 'gbtree',
  'colsample_bytree': 0.37452063115501655,
  'eval_metric': 'rmse',
  'gamma': 0.28154286771683146,
  'learning_rate': 0.05,
  'max_depth': 5,
  'min_child_weight': 4.552696720611812,
  'n_jobs': -1,
  'random_state': 2019,
  'reg_alpha': 0.20918722125243616,
  'reg_lambda': 0.8879452214056636,
  'subsample': 0.7642941012606239,
  'n_estimators': 98},
 56.731444)

# ExtraTrees

In [8]:
main_tuning_with_bo(
    X,
    y,
    model_nm='ET',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l1',
    task='regression')

So it will be used L2 istead here!


Trial file exists, will be renamed...               
100%|██████████| 10/10 [00:08<00:00,  1.16it/s, best loss: 66.41108133577004]


({'criterion': 'mse',
  'max_depth': 17,
  'max_features': 0.9974284492164226,
  'min_impurity_decrease': 3.5537425836003638,
  'min_samples_leaf': 53,
  'min_samples_split': 256,
  'n_estimators': 1000,
  'n_jobs': -1,
  'random_state': 2019},
 66.14895408194896)

# RF

In [9]:
main_tuning_with_bo(
    X,
    y,
    model_nm='RF',
    max_evals=10,
    folds=None,
    nfold=3,
    eval_metric='l2',
    task='regression')

Trial file exists, will be renamed...               
100%|██████████| 10/10 [00:04<00:00,  2.30it/s, best loss: 61.532565121235535]


({'criterion': 'mse',
  'max_depth': 27,
  'max_features': 0.2690201611806585,
  'min_impurity_decrease': 1.627536375807589,
  'min_samples_leaf': 41,
  'min_samples_split': 46,
  'n_estimators': 1000,
  'n_jobs': -1,
  'random_state': 2019},
 61.42167918913546)