In [1]:
import pandas as pd
import os
import numpy as np
from bayes_opt import BayesianOptimization
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor, Pool
import lightgbm as lgb
import xgboost as xgb

import gc
from numba import jit

random_seed = 33

train_file = 'train_0102_filter.csv'    
write_path = './BayesOpt_result/'

# model_type = 'catboost'
model_type = 'lgb'
# model_type = 'xgb'

if model_type == 'catboost':
    record_file = 'catboost_result.txt'
elif model_type == 'lgb':
    record_file = 'lgb_result.txt'
elif model_type == 'xgb':
    record_file = 'xgb_result.txt'
else:
    pass

iteration = 150
init_it = 50
category_list = []


In [2]:
@jit
def qwk(a1, a2):
    """
    Source: https://www.kaggle.com/c/data-science-bowl-2019/discussion/114133#latest-660168

    :param a1:
    :param a2:
    :param max_rat:
    :return:
    """
    max_rat = 3
    a1 = np.asarray(a1, dtype=int)
    a2 = np.asarray(a2, dtype=int)

    hist1 = np.zeros((max_rat + 1, ))
    hist2 = np.zeros((max_rat + 1, ))

    o = 0
    for k in range(a1.shape[0]):
        i, j = a1[k], a2[k]
        hist1[i] += 1
        hist2[j] += 1
        o +=  (i - j) * (i - j)

    e = 0
    for i in range(max_rat + 1):
        for j in range(max_rat + 1):
            e += hist1[i] * hist2[j] * (i - j) * (i - j)

    e = e / a1.shape[0]

    return 1 - o / e

def eval_qwk_lgb_regr(y_true, y_pred):
    """
    Fast cappa eval function for lgb.
    """
    y_pred[y_pred <= 1.12232214] = 0
    y_pred[np.where(np.logical_and(y_pred > 1.12232214, y_pred <= 1.73925866))] = 1
    y_pred[np.where(np.logical_and(y_pred > 1.73925866, y_pred <= 2.22506454))] = 2
    y_pred[y_pred > 2.22506454] = 3

    # y_pred = y_pred.reshape(len(np.unique(y_true)), -1).argmax(axis=0)

    return 'cappa', qwk(y_true, y_pred), True


In [None]:
train = pd.read_csv('../data/preprocess/'+train_file)
cols_to_drop = ['game_session', 'installation_id', 'timestamp', 'accuracy_group', 'timestampDate']
y = train['accuracy_group'].copy()
for c in cols_to_drop:
    if c in train.columns:
        train = train.drop(columns = c)
X = train

## 驗證集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed, stratify=y)
     
categorical_features_indices = np.where(X_train.columns.isin(category_list))[0]
print(X_train.dtypes[categorical_features_indices])

## catboost 調參
## https://catboost.ai/docs/concepts/loss-functions-regression.html
param_cat={
    'loss_function':'RMSE', ##MAE
    'eval_metric':'RMSE',
    
    'iterations':14000,
    'random_seed':random_seed,
    'thread_count':5,
    'task_type':"GPU",
    'devices':'0:1',
#     'boosting_type':'Ordered',
    'learning_rate':0.03,
    'l2_leaf_reg':20,#20
    'depth':7,
    'bagging_temperature':0.3,
    'random_strength':10,
    # 'rsm':0.8,

    # 'fold_permutation_block':1,
    # 'feature_border_type':'MinEntropy',
    # 'boosting_type':'Ordered',
    # 'leaf_estimation_backtracking':'Armijo',
    
    'one_hot_max_size':200,
#     'grow_policy':'Lossguide',
#     'grow_policy':'Lossguide',
}
param_range_cat={
#     'depth':(5,16.9),
#     'max_leaves':(20,45),
    'depth':(5,12.9),
    'max_leaves':(31,31.5),
    'l2_leaf_reg':(1,100),
    'bagging_temperature':(0.01,5)  
}

## LGB調參
## https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html#lightgbm.LGBMRegressor
## https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html

param_lgb = {'n_estimators':10000,
            'boosting_type': 'gbdt',
            'objective': 'regression',
            'metric': 'rmse',
            'subsample': 0.75,
            'subsample_freq': 1,
            'learning_rate': 0.04,
            'feature_fraction': 0.9,
            'max_depth': 15,
            'lambda_l1': 1,  
            'lambda_l2': 1,
            'verbose': 100,
            'early_stopping_rounds': 400, 'eval_metric': 'cappa'
            }

param_range_lgb={
    'max_depth': (5,16),
    'num_leaves': (15,100),
    'feature_fraction': (0.3,1),
    
    'subsample':(0.2,1),
    'subsample_freq':(1,3),
    'lambda_l1':(0.1,50),
    'lambda_l2':(0.1,50),
}


param_clf = {}
param_range = {}

if model_type == 'catboost':
    param_clf = param_cat
    param_range = param_range_cat
elif model_type == 'lgb':
    param_clf = param_lgb
    param_range = param_range_lgb
elif model_type == 'xgb':
    param_clf = param_lgb
    param_range = param_range_lgb
else:
    pass

def bys_train_catboost(depth,max_leaves,l2_leaf_reg,bagging_temperature):
    param_clf['depth']=int(depth)
    param_clf['max_leaves']=int(max_leaves)
    param_clf['l2_leaf_reg']=l2_leaf_reg
    param_clf['bagging_temperature']=bagging_temperature
    
    model = CatBoostRegressor(**param_clf)
    model.fit(X_train, y_train,
    cat_features=categorical_features_indices,    
    eval_set=(X_test, y_test),
    early_stopping_rounds=200,
    verbose=50) 
     
    score_max = model.get_best_score()['validation']['RMSE']
    
    print(int(depth),int(max_leaves),l2_leaf_reg,bagging_temperature)
    print(score_max)
    
    with open(write_path+record_file,'a') as f:
        print('depth',int(depth),file=f)
        print('max_leaves',int(max_leaves),file=f)
        print('l2_leaf_reg',l2_leaf_reg,file=f)
        print('bagging_temperature',bagging_temperature,file=f)
        print(score_max,file=f)
        print('',file=f)
    
    return -score_max

def bys_train_lgb(max_depth,num_leaves,feature_fraction,subsample,subsample_freq,lambda_l1,lambda_l2):
    param_clf['max_depth']=int(max_depth)
    param_clf['num_leaves']=int(num_leaves)
    param_clf['feature_fraction']=feature_fraction
    param_clf['subsample']=subsample
    param_clf['subsample_freq']=int(subsample_freq)
    param_clf['lambda_l1']=lambda_l1
    param_clf['lambda_l2']=lambda_l2
        
    model = lgb.LGBMRegressor(**param_clf)
    model.fit(X=X_train, y=y_train,
              eval_set=[(X_train, y_train),(X_test, y_test)], eval_metric=eval_qwk_lgb_regr,
              verbose=param_clf['verbose'], early_stopping_rounds=param_clf['early_stopping_rounds'],
              categorical_feature=categorical_features_indices)

    score_max = model.best_score_['valid_1']['rmse']
    
    print("parameter",int(max_depth),int(num_leaves),feature_fraction,subsample,int(subsample_freq),lambda_l1,lambda_l2)
    print("score",score_max)
    
    with open(write_path+record_file,'a') as f:
        print('max_depth',int(max_depth),file=f)
        print('num_leaves',int(num_leaves),file=f)
        print('feature_fraction',feature_fraction,file=f)
        print('subsample',subsample,file=f)
        print('subsample_freq',int(subsample_freq),file=f)
        print('lambda_l1',lambda_l1,file=f)
        print('lambda_l2',lambda_l2,file=f)
        print(score_max,file=f)
        print('',file=f)
    
    return -score_max

def main(write_path='./BayesOpt_result/',record_file='Bayes_result.txt',iteration=5,init_it=5):
    print(iteration)
    with open(write_path+record_file,'a') as f:
        print('\n{}'.format(train_file),file=f)
        
    if model_type == 'catboost':
        Bys_opt = BayesianOptimization(bys_train_catboost,param_range)#, acq="ucb", kappa=1) 
    elif model_type == 'lgb':
        Bys_opt = BayesianOptimization(bys_train_lgb,param_range)#, acq="ucb", kappa=1) 
    elif model_type == 'xgb':
        Bys_opt = BayesianOptimization(bys_train_lgb,param_range)#, acq="ucb", kappaand=1) 
    else:
        pass
    
    Bys_opt.maximize(n_iter=iteration, init_points=init_it)
    print(Bys_opt.max)
    
    with open(write_path+record_file,'a') as f:
        print('Max para',Bys_opt.max,file=f)

main(write_path=write_path, record_file=record_file, iteration=iteration,init_it=init_it)


Series([], dtype: object)
150
|   iter    |  target   | featur... | lambda_l1 | lambda_l2 | max_depth | num_le... | subsample | subsam... |
-------------------------------------------------------------------------------------------------------------


  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame
Compilation is falling back to object mode WITH looplifting enabled because Function "qwk" failed type inference due to: Invalid use of Function(<function asarray at 0x7f2e6c1b82f0>) with argument(s) of type(s): (array(float32, 1d, C), dtype=Function(<class 'int'>))
 * parameterized
In definition 0:
    AttributeError: 'Function' object has no attribute 'dtype'
    raised from /home/jupyter-shliu/.local/lib/python3.6/site-packages/numba/targets/arraymath.py:3845
In definition 1:
    AttributeError: 'Function' object has no attribute 'dtype'
    raised from /home/jupyter-shliu/.local/lib/python3.6/site-packages/numba/targets/arraymath.py:3845
This error is usually caused by passing an argument of a type that is unsupported by

Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.0608	training's cappa: 0.49734	valid_1's rmse: 1.07554	valid_1's cappa: 0.457939
[200]	training's rmse: 0.98402	training's cappa: 0.630443	valid_1's rmse: 1.01335	valid_1's cappa: 0.587912
[300]	training's rmse: 0.948048	training's cappa: 0.653073	valid_1's rmse: 0.991045	valid_1's cappa: 0.60336
[400]	training's rmse: 0.925435	training's cappa: 0.663295	valid_1's rmse: 0.98164	valid_1's cappa: 0.606216
[500]	training's rmse: 0.908679	training's cappa: 0.675403	valid_1's rmse: 0.977534	valid_1's cappa: 0.608347
[600]	training's rmse: 0.894682	training's cappa: 0.685188	valid_1's rmse: 0.97531	valid_1's cappa: 0.60836
[700]	training's rmse: 0.882178	training's cappa: 0.696636	valid_1's rmse: 0.974243	valid_1's cappa: 0.609029
[800]	training's rmse: 0.87085	training's cappa: 0.70642	valid_1's rmse: 0.973839	valid_1's cappa: 0.608283
[900]	training's rmse: 0.860395	training's cappa: 0.714186	valid_1's r

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07077	training's cappa: 0.473561	valid_1's rmse: 1.07137	valid_1's cappa: 0.469593
[200]	training's rmse: 1.01108	training's cappa: 0.595916	valid_1's rmse: 1.01626	valid_1's cappa: 0.588814
[300]	training's rmse: 0.98598	training's cappa: 0.612682	valid_1's rmse: 0.996006	valid_1's cappa: 0.597713
[400]	training's rmse: 0.971835	training's cappa: 0.620152	valid_1's rmse: 0.987378	valid_1's cappa: 0.598408
[500]	training's rmse: 0.962033	training's cappa: 0.627046	valid_1's rmse: 0.983347	valid_1's cappa: 0.5991
[600]	training's rmse: 0.954034	training's cappa: 0.632271	valid_1's rmse: 0.980161	valid_1's cappa: 0.603207
[700]	training's rmse: 0.947511	training's cappa: 0.637398	valid_1's rmse: 0.979043	valid_1's cappa: 0.601843
[800]	training's rmse: 0.941569	training's cappa: 0.64381	valid_1's rmse: 0.978069	valid_1's cappa: 0.60535
[900]	training's rmse: 0.936225	training's cappa: 0.647265	valid_1'

[500]	training's rmse: 0.94934	training's cappa: 0.636507	valid_1's rmse: 0.984618	valid_1's cappa: 0.599064
[600]	training's rmse: 0.940296	training's cappa: 0.644699	valid_1's rmse: 0.982531	valid_1's cappa: 0.602443
[700]	training's rmse: 0.932736	training's cappa: 0.650874	valid_1's rmse: 0.98136	valid_1's cappa: 0.602917
[800]	training's rmse: 0.926123	training's cappa: 0.656597	valid_1's rmse: 0.980447	valid_1's cappa: 0.604726
[900]	training's rmse: 0.920069	training's cappa: 0.661655	valid_1's rmse: 0.979994	valid_1's cappa: 0.603888
[1000]	training's rmse: 0.914446	training's cappa: 0.667228	valid_1's rmse: 0.979563	valid_1's cappa: 0.602929
[1100]	training's rmse: 0.909293	training's cappa: 0.671801	valid_1's rmse: 0.979231	valid_1's cappa: 0.602551
[1200]	training's rmse: 0.904401	training's cappa: 0.675323	valid_1's rmse: 0.979037	valid_1's cappa: 0.603986
Early stopping, best iteration is:
[802]	training's rmse: 0.926003	training's cappa: 0.65658	valid_1's rmse: 0.980402	v

[1200]	training's rmse: 0.884797	training's cappa: 0.692639	valid_1's rmse: 0.974312	valid_1's cappa: 0.606343
[1300]	training's rmse: 0.879475	training's cappa: 0.697685	valid_1's rmse: 0.974426	valid_1's cappa: 0.606112
Early stopping, best iteration is:
[966]	training's rmse: 0.899095	training's cappa: 0.680695	valid_1's rmse: 0.974523	valid_1's cappa: 0.609567
parameter 6 98 0.4824597171865502 0.9282635990090762 1 36.438679781746764 7.781564101687841
score 0.9745232599562138
| [0m 10      [0m | [0m-0.9745  [0m | [0m 0.4825  [0m | [0m 36.44   [0m | [0m 7.782   [0m | [0m 6.104   [0m | [0m 98.67   [0m | [0m 0.9283  [0m | [0m 1.178   [0m |
Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.05132	training's cappa: 0.538534	valid_1's rmse: 1.06033	valid_1's cappa: 0.51488
[200]	training's rmse: 0.981434	training's cappa: 0.627199	valid_1's rmse: 1.003	valid_1's cappa: 0.595867
[300]	training's rmse: 0.948943	training's cappa: 0.645324

[1300]	training's rmse: 0.89067	training's cappa: 0.68743	valid_1's rmse: 0.978744	valid_1's cappa: 0.604445
[1400]	training's rmse: 0.886116	training's cappa: 0.691143	valid_1's rmse: 0.978479	valid_1's cappa: 0.605252
[1500]	training's rmse: 0.881986	training's cappa: 0.694766	valid_1's rmse: 0.978264	valid_1's cappa: 0.604709
[1600]	training's rmse: 0.8779	training's cappa: 0.698587	valid_1's rmse: 0.978214	valid_1's cappa: 0.604513
[1700]	training's rmse: 0.874313	training's cappa: 0.702016	valid_1's rmse: 0.978408	valid_1's cappa: 0.604668
[1800]	training's rmse: 0.870792	training's cappa: 0.704327	valid_1's rmse: 0.97838	valid_1's cappa: 0.602967
[1900]	training's rmse: 0.867426	training's cappa: 0.707438	valid_1's rmse: 0.978384	valid_1's cappa: 0.603408
[2000]	training's rmse: 0.864255	training's cappa: 0.710373	valid_1's rmse: 0.97843	valid_1's cappa: 0.604631
Early stopping, best iteration is:
[1627]	training's rmse: 0.877024	training's cappa: 0.699157	valid_1's rmse: 0.97811

[800]	training's rmse: 0.874213	training's cappa: 0.70348	valid_1's rmse: 0.975918	valid_1's cappa: 0.6053
[900]	training's rmse: 0.86431	training's cappa: 0.711221	valid_1's rmse: 0.975692	valid_1's cappa: 0.607269
[1000]	training's rmse: 0.855287	training's cappa: 0.719226	valid_1's rmse: 0.975596	valid_1's cappa: 0.607154
[1100]	training's rmse: 0.846851	training's cappa: 0.726518	valid_1's rmse: 0.975652	valid_1's cappa: 0.606726
[1200]	training's rmse: 0.83876	training's cappa: 0.733115	valid_1's rmse: 0.975608	valid_1's cappa: 0.608163
[1300]	training's rmse: 0.831501	training's cappa: 0.740408	valid_1's rmse: 0.975744	valid_1's cappa: 0.607919
Early stopping, best iteration is:
[983]	training's rmse: 0.856768	training's cappa: 0.717684	valid_1's rmse: 0.97555	valid_1's cappa: 0.607907
parameter 11 52 0.7506112752856493 0.9616714914399644 1 35.28394279333552 47.20058704849561
score 0.9755495415579283
| [0m 18      [0m | [0m-0.9755  [0m | [0m 0.7506  [0m | [0m 35.28   [0m 

[1300]	training's rmse: 0.888534	training's cappa: 0.691191	valid_1's rmse: 0.97728	valid_1's cappa: 0.607375
[1400]	training's rmse: 0.883168	training's cappa: 0.694954	valid_1's rmse: 0.977367	valid_1's cappa: 0.609382
[1500]	training's rmse: 0.878107	training's cappa: 0.700286	valid_1's rmse: 0.977427	valid_1's cappa: 0.609476
Early stopping, best iteration is:
[1136]	training's rmse: 0.897678	training's cappa: 0.682341	valid_1's rmse: 0.977371	valid_1's cappa: 0.610599
parameter 9 77 0.5976327664580293 0.31494347754118374 2 26.688726082129893 36.07552083327886
score 0.9773714551778184
| [0m 23      [0m | [0m-0.9774  [0m | [0m 0.5976  [0m | [0m 26.69   [0m | [0m 36.08   [0m | [0m 9.265   [0m | [0m 77.21   [0m | [0m 0.3149  [0m | [0m 2.248   [0m |
Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.03771	training's cappa: 0.571929	valid_1's rmse: 1.05072	valid_1's cappa: 0.541559
[200]	training's rmse: 0.967945	training's cappa: 0.

[1200]	training's rmse: 0.849983	training's cappa: 0.723715	valid_1's rmse: 0.975635	valid_1's cappa: 0.611465
[1300]	training's rmse: 0.843094	training's cappa: 0.730803	valid_1's rmse: 0.975775	valid_1's cappa: 0.611595
[1400]	training's rmse: 0.836382	training's cappa: 0.736329	valid_1's rmse: 0.975665	valid_1's cappa: 0.610929
Early stopping, best iteration is:
[1038]	training's rmse: 0.862244	training's cappa: 0.711537	valid_1's rmse: 0.975894	valid_1's cappa: 0.613385
parameter 14 92 0.45574642625495776 0.6812434811560089 1 33.88074614829802 32.42914791482408
score 0.9758936418002243
| [0m 28      [0m | [0m-0.9759  [0m | [0m 0.4557  [0m | [0m 33.88   [0m | [0m 32.43   [0m | [0m 14.4    [0m | [0m 92.44   [0m | [0m 0.6812  [0m | [0m 1.837   [0m |
Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.05808	training's cappa: 0.536363	valid_1's rmse: 1.06303	valid_1's cappa: 0.522325
[200]	training's rmse: 0.996412	training's cappa: 0

[200]	training's rmse: 1.02126	training's cappa: 0.590158	valid_1's rmse: 1.03064	valid_1's cappa: 0.570265
[300]	training's rmse: 0.991945	training's cappa: 0.609184	valid_1's rmse: 1.0061	valid_1's cappa: 0.58933
[400]	training's rmse: 0.975442	training's cappa: 0.61896	valid_1's rmse: 0.994263	valid_1's cappa: 0.595294
[500]	training's rmse: 0.965244	training's cappa: 0.624712	valid_1's rmse: 0.988942	valid_1's cappa: 0.59956
[600]	training's rmse: 0.957314	training's cappa: 0.631062	valid_1's rmse: 0.985478	valid_1's cappa: 0.599297
[700]	training's rmse: 0.951111	training's cappa: 0.63541	valid_1's rmse: 0.983452	valid_1's cappa: 0.601987
[800]	training's rmse: 0.945902	training's cappa: 0.640652	valid_1's rmse: 0.982318	valid_1's cappa: 0.601323
[900]	training's rmse: 0.941443	training's cappa: 0.644308	valid_1's rmse: 0.981281	valid_1's cappa: 0.603858
[1000]	training's rmse: 0.937209	training's cappa: 0.647482	valid_1's rmse: 0.980557	valid_1's cappa: 0.604234
[1100]	training's

[1100]	training's rmse: 0.876407	training's cappa: 0.698931	valid_1's rmse: 0.975399	valid_1's cappa: 0.608054
[1200]	training's rmse: 0.870617	training's cappa: 0.703172	valid_1's rmse: 0.975177	valid_1's cappa: 0.607362
[1300]	training's rmse: 0.865309	training's cappa: 0.708844	valid_1's rmse: 0.975189	valid_1's cappa: 0.606284
[1400]	training's rmse: 0.860371	training's cappa: 0.712522	valid_1's rmse: 0.975299	valid_1's cappa: 0.608796
Early stopping, best iteration is:
[1007]	training's rmse: 0.882264	training's cappa: 0.695671	valid_1's rmse: 0.975817	valid_1's cappa: 0.610855
parameter 12 52 0.42640608413773773 0.8662333171742365 2 45.55645037799242 17.842270331916616
score 0.9758168402619135
| [0m 36      [0m | [0m-0.9758  [0m | [0m 0.4264  [0m | [0m 45.56   [0m | [0m 17.84   [0m | [0m 12.37   [0m | [0m 52.48   [0m | [0m 0.8662  [0m | [0m 2.615   [0m |
Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.04994	training's cappa

Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.0522	training's cappa: 0.526943	valid_1's rmse: 1.06128	valid_1's cappa: 0.499492
[200]	training's rmse: 0.982065	training's cappa: 0.627349	valid_1's rmse: 1.00474	valid_1's cappa: 0.593226
[300]	training's rmse: 0.947898	training's cappa: 0.646405	valid_1's rmse: 0.984914	valid_1's cappa: 0.599296
[400]	training's rmse: 0.925837	training's cappa: 0.661458	valid_1's rmse: 0.977214	valid_1's cappa: 0.60333
[500]	training's rmse: 0.908726	training's cappa: 0.674996	valid_1's rmse: 0.974365	valid_1's cappa: 0.607397
[600]	training's rmse: 0.89409	training's cappa: 0.687603	valid_1's rmse: 0.973016	valid_1's cappa: 0.608428
[700]	training's rmse: 0.881034	training's cappa: 0.696846	valid_1's rmse: 0.97249	valid_1's cappa: 0.612504
[800]	training's rmse: 0.869355	training's cappa: 0.706773	valid_1's rmse: 0.972463	valid_1's cappa: 0.611839
[900]	training's rmse: 0.857878	training's cappa: 0.716168	valid_

[800]	training's rmse: 0.786209	training's cappa: 0.776049	valid_1's rmse: 0.973662	valid_1's cappa: 0.615704
[900]	training's rmse: 0.769413	training's cappa: 0.789012	valid_1's rmse: 0.974627	valid_1's cappa: 0.614564
[1000]	training's rmse: 0.75429	training's cappa: 0.80086	valid_1's rmse: 0.975247	valid_1's cappa: 0.611472
Early stopping, best iteration is:
[648]	training's rmse: 0.813551	training's cappa: 0.754625	valid_1's rmse: 0.972829	valid_1's cappa: 0.614037
parameter 12 81 0.42508576504492623 0.6537767994170935 2 9.737853629338442 12.718638992702983
score 0.9728294389062776
| [0m 45      [0m | [0m-0.9728  [0m | [0m 0.4251  [0m | [0m 9.738   [0m | [0m 12.72   [0m | [0m 12.84   [0m | [0m 81.48   [0m | [0m 0.6538  [0m | [0m 2.079   [0m |
Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.01357	training's cappa: 0.610436	valid_1's rmse: 1.04279	valid_1's cappa: 0.548992
[200]	training's rmse: 0.926218	training's cappa: 0.686

Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.01988	training's cappa: 0.601329	valid_1's rmse: 1.04007	valid_1's cappa: 0.55286
[200]	training's rmse: 0.942385	training's cappa: 0.666589	valid_1's rmse: 0.989176	valid_1's cappa: 0.598353
[300]	training's rmse: 0.902716	training's cappa: 0.688192	valid_1's rmse: 0.975667	valid_1's cappa: 0.607537
[400]	training's rmse: 0.874356	training's cappa: 0.707298	valid_1's rmse: 0.972057	valid_1's cappa: 0.609048
[500]	training's rmse: 0.852412	training's cappa: 0.722739	valid_1's rmse: 0.971152	valid_1's cappa: 0.611521
[600]	training's rmse: 0.832879	training's cappa: 0.7377	valid_1's rmse: 0.971396	valid_1's cappa: 0.610563
[700]	training's rmse: 0.814574	training's cappa: 0.752552	valid_1's rmse: 0.971591	valid_1's cappa: 0.610122
[800]	training's rmse: 0.798216	training's cappa: 0.765746	valid_1's rmse: 0.972031	valid_1's cappa: 0.610741
[900]	training's rmse: 0.782792	training's cappa: 0.777366	vali

  if categorical_feature == 'auto':  # use cat cols from DataFrame
  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07988	training's cappa: 0.449689	valid_1's rmse: 1.08327	valid_1's cappa: 0.430233
[200]	training's rmse: 1.01005	training's cappa: 0.598495	valid_1's rmse: 1.01861	valid_1's cappa: 0.579013
[300]	training's rmse: 0.978341	training's cappa: 0.618628	valid_1's rmse: 0.992997	valid_1's cappa: 0.601705
[400]	training's rmse: 0.960574	training's cappa: 0.626683	valid_1's rmse: 0.98175	valid_1's cappa: 0.607387
[500]	training's rmse: 0.948545	training's cappa: 0.638404	valid_1's rmse: 0.977034	valid_1's cappa: 0.611357
[600]	training's rmse: 0.938851	training's cappa: 0.646304	valid_1's rmse: 0.974508	valid_1's cappa: 0.611536
[700]	training's rmse: 0.930445	training's cappa: 0.652666	valid_1's rmse: 0.973202	valid_1's cappa: 0.612118
[800]	training's rmse: 0.922822	training's cappa: 0.658869	valid_1's rmse: 0.972707	valid_1's cappa: 0.611353
[900]	training's rmse: 0.915723	training's cappa: 0.66543	valid

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07389	training's cappa: 0.465539	valid_1's rmse: 1.08052	valid_1's cappa: 0.448932
[200]	training's rmse: 1.00041	training's cappa: 0.60851	valid_1's rmse: 1.0161	valid_1's cappa: 0.584032
[300]	training's rmse: 0.965843	training's cappa: 0.634815	valid_1's rmse: 0.991875	valid_1's cappa: 0.598618
[400]	training's rmse: 0.944606	training's cappa: 0.64759	valid_1's rmse: 0.981677	valid_1's cappa: 0.605127
[500]	training's rmse: 0.929574	training's cappa: 0.659177	valid_1's rmse: 0.977193	valid_1's cappa: 0.606312
[600]	training's rmse: 0.917377	training's cappa: 0.666768	valid_1's rmse: 0.975104	valid_1's cappa: 0.60852
[700]	training's rmse: 0.90694	training's cappa: 0.675293	valid_1's rmse: 0.973924	valid_1's cappa: 0.611253
[800]	training's rmse: 0.897432	training's cappa: 0.684129	valid_1's rmse: 0.973856	valid_1's cappa: 0.612648
[900]	training's rmse: 0.888505	training's cappa: 0.691179	valid_1'

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.02388	training's cappa: 0.592711	valid_1's rmse: 1.0523	valid_1's cappa: 0.530906
[200]	training's rmse: 0.936755	training's cappa: 0.677899	valid_1's rmse: 0.996944	valid_1's cappa: 0.598345
[300]	training's rmse: 0.888646	training's cappa: 0.705858	valid_1's rmse: 0.980455	valid_1's cappa: 0.608239
[400]	training's rmse: 0.854125	training's cappa: 0.729204	valid_1's rmse: 0.97509	valid_1's cappa: 0.608428
[500]	training's rmse: 0.825832	training's cappa: 0.747254	valid_1's rmse: 0.973951	valid_1's cappa: 0.609497
[600]	training's rmse: 0.800509	training's cappa: 0.764905	valid_1's rmse: 0.973924	valid_1's cappa: 0.61104
[700]	training's rmse: 0.777375	training's cappa: 0.783104	valid_1's rmse: 0.974308	valid_1's cappa: 0.612732
[800]	training's rmse: 0.756604	training's cappa: 0.79791	valid_1's rmse: 0.97521	valid_1's cappa: 0.610518
[900]	training's rmse: 0.737548	training's cappa: 0.811353	valid_

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.06776	training's cappa: 0.489306	valid_1's rmse: 1.06769	valid_1's cappa: 0.484821
[200]	training's rmse: 1.01012	training's cappa: 0.593704	valid_1's rmse: 1.01426	valid_1's cappa: 0.585639
[300]	training's rmse: 0.986416	training's cappa: 0.608674	valid_1's rmse: 0.994736	valid_1's cappa: 0.597404
[400]	training's rmse: 0.972859	training's cappa: 0.61638	valid_1's rmse: 0.986348	valid_1's cappa: 0.600079
[500]	training's rmse: 0.963106	training's cappa: 0.622718	valid_1's rmse: 0.982195	valid_1's cappa: 0.601341
[600]	training's rmse: 0.95542	training's cappa: 0.630695	valid_1's rmse: 0.979942	valid_1's cappa: 0.602049
[700]	training's rmse: 0.94904	training's cappa: 0.635538	valid_1's rmse: 0.978652	valid_1's cappa: 0.601405
[800]	training's rmse: 0.943502	training's cappa: 0.640693	valid_1's rmse: 0.977795	valid_1's cappa: 0.602838
[900]	training's rmse: 0.938482	training's cappa: 0.644846	valid_

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.02155	training's cappa: 0.577506	valid_1's rmse: 1.06398	valid_1's cappa: 0.493252
[200]	training's rmse: 0.919601	training's cappa: 0.702763	valid_1's rmse: 1.00391	valid_1's cappa: 0.592715
[300]	training's rmse: 0.861158	training's cappa: 0.735311	valid_1's rmse: 0.983782	valid_1's cappa: 0.607462
[400]	training's rmse: 0.820441	training's cappa: 0.760246	valid_1's rmse: 0.976788	valid_1's cappa: 0.608333
[500]	training's rmse: 0.788421	training's cappa: 0.781571	valid_1's rmse: 0.974582	valid_1's cappa: 0.608507
[600]	training's rmse: 0.760305	training's cappa: 0.79921	valid_1's rmse: 0.973991	valid_1's cappa: 0.610322
[700]	training's rmse: 0.73525	training's cappa: 0.815451	valid_1's rmse: 0.973726	valid_1's cappa: 0.611496
[800]	training's rmse: 0.712004	training's cappa: 0.830729	valid_1's rmse: 0.974162	valid_1's cappa: 0.610321
[900]	training's rmse: 0.690627	training's cappa: 0.843968	vali

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.08695	training's cappa: 0.432296	valid_1's rmse: 1.09104	valid_1's cappa: 0.414045
[200]	training's rmse: 1.01768	training's cappa: 0.590794	valid_1's rmse: 1.02646	valid_1's cappa: 0.578241
[300]	training's rmse: 0.986251	training's cappa: 0.614016	valid_1's rmse: 1.00017	valid_1's cappa: 0.594002
[400]	training's rmse: 0.968643	training's cappa: 0.623002	valid_1's rmse: 0.987896	valid_1's cappa: 0.605085
[500]	training's rmse: 0.957216	training's cappa: 0.631961	valid_1's rmse: 0.982039	valid_1's cappa: 0.607337
[600]	training's rmse: 0.948413	training's cappa: 0.638532	valid_1's rmse: 0.978908	valid_1's cappa: 0.608604
[700]	training's rmse: 0.941129	training's cappa: 0.643456	valid_1's rmse: 0.977025	valid_1's cappa: 0.608407
[800]	training's rmse: 0.934761	training's cappa: 0.649005	valid_1's rmse: 0.975855	valid_1's cappa: 0.609131
[900]	training's rmse: 0.929012	training's cappa: 0.653687	vali

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.05511	training's cappa: 0.532968	valid_1's rmse: 1.05559	valid_1's cappa: 0.517745
[200]	training's rmse: 0.994457	training's cappa: 0.610421	valid_1's rmse: 1.00209	valid_1's cappa: 0.595446
[300]	training's rmse: 0.967333	training's cappa: 0.627219	valid_1's rmse: 0.983568	valid_1's cappa: 0.606907
[400]	training's rmse: 0.950429	training's cappa: 0.638079	valid_1's rmse: 0.976332	valid_1's cappa: 0.609906
[500]	training's rmse: 0.937086	training's cappa: 0.649854	valid_1's rmse: 0.973832	valid_1's cappa: 0.611285
[600]	training's rmse: 0.925821	training's cappa: 0.659029	valid_1's rmse: 0.973034	valid_1's cappa: 0.610572
[700]	training's rmse: 0.915912	training's cappa: 0.666864	valid_1's rmse: 0.972474	valid_1's cappa: 0.609841
[800]	training's rmse: 0.90661	training's cappa: 0.675765	valid_1's rmse: 0.972362	valid_1's cappa: 0.610104
Early stopping, best iteration is:
[447]	training's rmse: 0.94

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.08272	training's cappa: 0.445987	valid_1's rmse: 1.09138	valid_1's cappa: 0.4176
[200]	training's rmse: 1.00878	training's cappa: 0.602749	valid_1's rmse: 1.02702	valid_1's cappa: 0.573168
[300]	training's rmse: 0.970855	training's cappa: 0.632831	valid_1's rmse: 0.999344	valid_1's cappa: 0.596511
[400]	training's rmse: 0.947175	training's cappa: 0.64745	valid_1's rmse: 0.986349	valid_1's cappa: 0.602786
[500]	training's rmse: 0.930538	training's cappa: 0.658822	valid_1's rmse: 0.980696	valid_1's cappa: 0.607417
[600]	training's rmse: 0.917611	training's cappa: 0.668188	valid_1's rmse: 0.977818	valid_1's cappa: 0.605233
[700]	training's rmse: 0.90626	training's cappa: 0.676304	valid_1's rmse: 0.976061	valid_1's cappa: 0.608116
[800]	training's rmse: 0.895953	training's cappa: 0.685027	valid_1's rmse: 0.975014	valid_1's cappa: 0.609742
[900]	training's rmse: 0.886703	training's cappa: 0.691951	valid_1

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.04503	training's cappa: 0.561944	valid_1's rmse: 1.05558	valid_1's cappa: 0.536399
[200]	training's rmse: 0.979207	training's cappa: 0.629087	valid_1's rmse: 1.0048	valid_1's cappa: 0.593633
[300]	training's rmse: 0.948133	training's cappa: 0.645367	valid_1's rmse: 0.988555	valid_1's cappa: 0.599075
[400]	training's rmse: 0.928255	training's cappa: 0.658278	valid_1's rmse: 0.982438	valid_1's cappa: 0.601171
[500]	training's rmse: 0.913383	training's cappa: 0.669166	valid_1's rmse: 0.979745	valid_1's cappa: 0.600863
[600]	training's rmse: 0.901273	training's cappa: 0.679692	valid_1's rmse: 0.97843	valid_1's cappa: 0.602378
[700]	training's rmse: 0.891102	training's cappa: 0.688999	valid_1's rmse: 0.977633	valid_1's cappa: 0.602822
[800]	training's rmse: 0.882826	training's cappa: 0.696014	valid_1's rmse: 0.977456	valid_1's cappa: 0.603233
[900]	training's rmse: 0.875546	training's cappa: 0.700957	vali

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07126	training's cappa: 0.473212	valid_1's rmse: 1.08197	valid_1's cappa: 0.439191
[200]	training's rmse: 0.994839	training's cappa: 0.617249	valid_1's rmse: 1.0181	valid_1's cappa: 0.582095
[300]	training's rmse: 0.95637	training's cappa: 0.64594	valid_1's rmse: 0.992846	valid_1's cappa: 0.596997
[400]	training's rmse: 0.932544	training's cappa: 0.659944	valid_1's rmse: 0.982042	valid_1's cappa: 0.603565
[500]	training's rmse: 0.915569	training's cappa: 0.672221	valid_1's rmse: 0.977607	valid_1's cappa: 0.60662
[600]	training's rmse: 0.901811	training's cappa: 0.682871	valid_1's rmse: 0.975946	valid_1's cappa: 0.608533
[700]	training's rmse: 0.89012	training's cappa: 0.691909	valid_1's rmse: 0.974435	valid_1's cappa: 0.609899
[800]	training's rmse: 0.879641	training's cappa: 0.700987	valid_1's rmse: 0.973784	valid_1's cappa: 0.610082
[900]	training's rmse: 0.86981	training's cappa: 0.70905	valid_1's

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.03863	training's cappa: 0.551876	valid_1's rmse: 1.06506	valid_1's cappa: 0.489084
[200]	training's rmse: 0.948039	training's cappa: 0.673246	valid_1's rmse: 1.00237	valid_1's cappa: 0.595826
[300]	training's rmse: 0.90065	training's cappa: 0.697065	valid_1's rmse: 0.982185	valid_1's cappa: 0.608424
[400]	training's rmse: 0.867908	training's cappa: 0.716114	valid_1's rmse: 0.974315	valid_1's cappa: 0.611402
[500]	training's rmse: 0.842295	training's cappa: 0.734103	valid_1's rmse: 0.97264	valid_1's cappa: 0.611804
[600]	training's rmse: 0.819868	training's cappa: 0.74957	valid_1's rmse: 0.972174	valid_1's cappa: 0.612046
[700]	training's rmse: 0.799457	training's cappa: 0.764281	valid_1's rmse: 0.972561	valid_1's cappa: 0.612506
[800]	training's rmse: 0.781143	training's cappa: 0.778695	valid_1's rmse: 0.973347	valid_1's cappa: 0.6134
[900]	training's rmse: 0.763946	training's cappa: 0.792559	valid_1

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07391	training's cappa: 0.468023	valid_1's rmse: 1.08423	valid_1's cappa: 0.437515
[200]	training's rmse: 0.997701	training's cappa: 0.613911	valid_1's rmse: 1.02006	valid_1's cappa: 0.576275
[300]	training's rmse: 0.959281	training's cappa: 0.644134	valid_1's rmse: 0.994198	valid_1's cappa: 0.593411
[400]	training's rmse: 0.93512	training's cappa: 0.658609	valid_1's rmse: 0.983485	valid_1's cappa: 0.60258
[500]	training's rmse: 0.918014	training's cappa: 0.670529	valid_1's rmse: 0.978505	valid_1's cappa: 0.605645
[600]	training's rmse: 0.904304	training's cappa: 0.679799	valid_1's rmse: 0.976255	valid_1's cappa: 0.608202
[700]	training's rmse: 0.893585	training's cappa: 0.687033	valid_1's rmse: 0.974968	valid_1's cappa: 0.608502
[800]	training's rmse: 0.883189	training's cappa: 0.695139	valid_1's rmse: 0.97436	valid_1's cappa: 0.609997
[900]	training's rmse: 0.8733	training's cappa: 0.704699	valid_1

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.02844	training's cappa: 0.571332	valid_1's rmse: 1.0656	valid_1's cappa: 0.488982
[200]	training's rmse: 0.927876	training's cappa: 0.696778	valid_1's rmse: 1.00399	valid_1's cappa: 0.595374
[300]	training's rmse: 0.87084	training's cappa: 0.72691	valid_1's rmse: 0.984086	valid_1's cappa: 0.606298
[400]	training's rmse: 0.830242	training's cappa: 0.7511	valid_1's rmse: 0.976513	valid_1's cappa: 0.610219
[500]	training's rmse: 0.797333	training's cappa: 0.77123	valid_1's rmse: 0.974277	valid_1's cappa: 0.611231
[600]	training's rmse: 0.768647	training's cappa: 0.791064	valid_1's rmse: 0.974317	valid_1's cappa: 0.614249
[700]	training's rmse: 0.743147	training's cappa: 0.809524	valid_1's rmse: 0.974698	valid_1's cappa: 0.612122
[800]	training's rmse: 0.720059	training's cappa: 0.823089	valid_1's rmse: 0.975561	valid_1's cappa: 0.607208
[900]	training's rmse: 0.699449	training's cappa: 0.835703	valid_1'

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.07845	training's cappa: 0.455833	valid_1's rmse: 1.08584	valid_1's cappa: 0.433385
[200]	training's rmse: 1.00506	training's cappa: 0.604055	valid_1's rmse: 1.02116	valid_1's cappa: 0.579722
[300]	training's rmse: 0.970209	training's cappa: 0.632149	valid_1's rmse: 0.995553	valid_1's cappa: 0.596633
[400]	training's rmse: 0.948781	training's cappa: 0.643877	valid_1's rmse: 0.984166	valid_1's cappa: 0.603522
[500]	training's rmse: 0.933887	training's cappa: 0.653166	valid_1's rmse: 0.979427	valid_1's cappa: 0.605608
[600]	training's rmse: 0.921928	training's cappa: 0.663038	valid_1's rmse: 0.976846	valid_1's cappa: 0.609163
[700]	training's rmse: 0.911739	training's cappa: 0.671594	valid_1's rmse: 0.975733	valid_1's cappa: 0.608902
[800]	training's rmse: 0.902271	training's cappa: 0.678601	valid_1's rmse: 0.974904	valid_1's cappa: 0.606849
[900]	training's rmse: 0.893532	training's cappa: 0.686116	val

  if self.categorical_feature == categorical_feature:
  elif categorical_feature == 'auto':
New categorical_feature is []
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
  if categorical_feature == 'auto':  # use cat cols from DataFrame


Training until validation scores don't improve for 400 rounds
[100]	training's rmse: 1.03586	training's cappa: 0.556119	valid_1's rmse: 1.06996	valid_1's cappa: 0.47844
[200]	training's rmse: 0.93976	training's cappa: 0.684592	valid_1's rmse: 1.00823	valid_1's cappa: 0.592036


## 原理理解
## https://github.com/fmfn/BayesianOptimization/blob/master/examples/visualization.ipynb


#### init step越多，越不容易卡在local mini
#### Kappa決定confidence interval範圍
#### 不了解kappa, GP的過程 唉