In [15]:
## model = catboost

import pandas as pd
import os
import numpy as np
from bayes_opt import BayesianOptimization
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor, Pool

random_seed = 33

train_file = 'train895.csv'    
write_path = './BayesOpt_result/'
record_file = 'catboost_result.txt'
iteration = 10
init_it = 10
category_list = []

train = pd.read_csv('../data/preprocess/'+train_file)
cols_to_drop = ['game_session', 'installation_id', 'timestamp', 'accuracy_group', 'timestampDate']
y = train['accuracy_group'].copy()
for c in cols_to_drop:
    if c in train.columns:
        train = train.drop(columns = c)
X = train

## 切三種不同的訓練集驗證
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed, stratify=y)
     
categorical_features_indices = np.where(X_train.columns.isin(category_list))[0]
print(X_train.dtypes[categorical_features_indices])

## catboost 調參
## https://catboost.ai/docs/concepts/loss-functions-regression.html
    
param_clf={
    'loss_function':'RMSE', ##MAE
    'eval_metric':'RMSE',
    
    'iterations':14000,
    'random_seed':random_seed,
    'thread_count':1,
    'task_type':"GPU",
    'devices':'0:1',
#     'boosting_type':'Ordered',
    'learning_rate':0.03,
    'l2_leaf_reg':20,#20
    'depth':7,
    'bagging_temperature':0.3,
    'random_strength':10,
    # 'rsm':0.8,

    # 'fold_permutation_block':1,
    # 'feature_border_type':'MinEntropy',
    # 'boosting_type':'Ordered',
    # 'leaf_estimation_backtracking':'Armijo',
    
    'one_hot_max_size':200,
    'grow_policy':'Lossguide',
}

param_range={
    'depth':(5,16.9),
    'max_leaves':(20,45),
    'l2_leaf_reg':(1,100),
    'bagging_temperature':(0.01,5)  
}

def bys_train_model(depth,max_leaves,l2_leaf_reg,bagging_temperature):
    param_clf['depth']=int(depth)
    param_clf['max_leaves']=int(max_leaves)
    param_clf['l2_leaf_reg']=l2_leaf_reg
    param_clf['bagging_temperature']=bagging_temperature
    
    model = CatBoostRegressor(**param_clf)
    model.fit(X_train, y_train,
    cat_features=categorical_features_indices,    
    eval_set=(X_test, y_test),
    early_stopping_rounds=1000,
    verbose=50) 
     
    score_max = model.get_best_score()['validation']['RMSE']
    
    print(int(depth),int(max_leaves),l2_leaf_reg,bagging_temperature)
    print(score_max)
    
    with open(write_path+record_file,'a') as f:
        print('depth',int(depth),file=f)
        print('max_leaves',int(max_leaves),file=f)
        print('l2_leaf_reg',l2_leaf_reg,file=f)
        print('bagging_temperature',bagging_temperature,file=f)
        print(score_max,file=f)
        print('',file=f)
    
    return -score_max


def main(write_path='./BayesOpt_result/',record_file='Bayes_result.txt',iteration=5,init_it=5):
    print(iteration)
    with open(write_path+record_file,'a') as f:
        print('\n{}'.format(train_file),file=f)

    Bys_opt = BayesianOptimization(bys_train_model,param_range) 
    Bys_opt.maximize(n_iter=iteration, init_points=init_it)
    print(Bys_opt.max)
    
    with open(write_path+record_file,'a') as f:
        print('Max para',Bys_opt.max,file=f)

main(write_path=write_path, record_file=record_file, iteration=iteration,init_it=init_it)


Series([], dtype: object)
10
|   iter    |  target   | baggin... |   depth   | l2_lea... | max_le... |
-------------------------------------------------------------------------
0:	learn: 1.2453430	test: 1.2450854	best: 1.2450854 (0)	total: 33.1ms	remaining: 7m 43s
50:	learn: 1.0269330	test: 1.0329938	best: 1.0329938 (50)	total: 1.5s	remaining: 6m 50s
100:	learn: 0.9835729	test: 1.0023664	best: 1.0023664 (100)	total: 2.95s	remaining: 6m 45s
150:	learn: 0.9630354	test: 0.9931922	best: 0.9931922 (150)	total: 4.37s	remaining: 6m 40s
200:	learn: 0.9487160	test: 0.9898308	best: 0.9898308 (200)	total: 5.8s	remaining: 6m 38s
250:	learn: 0.9374791	test: 0.9878376	best: 0.9878253 (249)	total: 7.27s	remaining: 6m 38s
300:	learn: 0.9276564	test: 0.9869380	best: 0.9869380 (300)	total: 8.71s	remaining: 6m 36s
350:	learn: 0.9187001	test: 0.9858856	best: 0.9858856 (350)	total: 10.2s	remaining: 6m 34s
400:	learn: 0.9104987	test: 0.9853403	best: 0.9853384 (399)	total: 11.6s	remaining: 6m 32s
450:	learn:

1250:	learn: 0.7860678	test: 0.9861464	best: 0.9817441 (464)	total: 17.1s	remaining: 2m 54s
1300:	learn: 0.7814058	test: 0.9861782	best: 0.9817441 (464)	total: 17.8s	remaining: 2m 54s
1350:	learn: 0.7769189	test: 0.9860753	best: 0.9817441 (464)	total: 18.5s	remaining: 2m 53s
1400:	learn: 0.7721946	test: 0.9866288	best: 0.9817441 (464)	total: 19.2s	remaining: 2m 52s
1450:	learn: 0.7676032	test: 0.9871089	best: 0.9817441 (464)	total: 19.9s	remaining: 2m 52s
bestTest = 0.9817441029
bestIteration = 464
Shrink model to first 465 iterations.
5 28 64.92245593634513 1.2484009195095667
0.9817441029006231
| [95m 3       [0m | [95m-0.9817  [0m | [95m 1.248   [0m | [95m 5.732   [0m | [95m 64.92   [0m | [95m 28.45   [0m |
0:	learn: 1.2441627	test: 1.2442126	best: 1.2442126 (0)	total: 24.8ms	remaining: 5m 47s
50:	learn: 0.9991493	test: 1.0170894	best: 1.0170894 (50)	total: 1s	remaining: 4m 34s
100:	learn: 0.9467065	test: 0.9892075	best: 0.9892075 (100)	total: 1.98s	remaining: 4m 32s
150:

1150:	learn: 0.8778585	test: 0.9870696	best: 0.9869350 (1045)	total: 26.8s	remaining: 4m 59s
1200:	learn: 0.8744007	test: 0.9872503	best: 0.9869350 (1045)	total: 28s	remaining: 4m 58s
1250:	learn: 0.8708934	test: 0.9872555	best: 0.9869350 (1045)	total: 29.2s	remaining: 4m 57s
1300:	learn: 0.8676053	test: 0.9875964	best: 0.9869350 (1045)	total: 30.4s	remaining: 4m 56s
1350:	learn: 0.8644351	test: 0.9874000	best: 0.9869350 (1045)	total: 31.5s	remaining: 4m 54s
1400:	learn: 0.8613533	test: 0.9874723	best: 0.9869350 (1045)	total: 32.5s	remaining: 4m 52s
1450:	learn: 0.8583908	test: 0.9874314	best: 0.9869350 (1045)	total: 33.7s	remaining: 4m 51s
1500:	learn: 0.8553713	test: 0.9875198	best: 0.9869350 (1045)	total: 34.8s	remaining: 4m 50s
1550:	learn: 0.8523634	test: 0.9876652	best: 0.9869350 (1045)	total: 35.9s	remaining: 4m 48s
1600:	learn: 0.8489646	test: 0.9880163	best: 0.9869350 (1045)	total: 37s	remaining: 4m 46s
1650:	learn: 0.8460292	test: 0.9878774	best: 0.9869350 (1045)	total: 38.1s

1550:	learn: 0.9096340	test: 0.9896108	best: 0.9892625 (1346)	total: 23.6s	remaining: 3m 9s
1600:	learn: 0.9075587	test: 0.9897816	best: 0.9892625 (1346)	total: 24.4s	remaining: 3m 9s
1650:	learn: 0.9056075	test: 0.9895669	best: 0.9892625 (1346)	total: 25.2s	remaining: 3m 8s
1700:	learn: 0.9037228	test: 0.9894189	best: 0.9892625 (1346)	total: 26s	remaining: 3m 8s
1750:	learn: 0.9018829	test: 0.9893686	best: 0.9892625 (1346)	total: 26.8s	remaining: 3m 7s
1800:	learn: 0.8999085	test: 0.9894971	best: 0.9892625 (1346)	total: 27.6s	remaining: 3m 7s
1850:	learn: 0.8979590	test: 0.9896817	best: 0.9892625 (1346)	total: 28.4s	remaining: 3m 6s
1900:	learn: 0.8961867	test: 0.9893091	best: 0.9892518 (1888)	total: 29.2s	remaining: 3m 5s
1950:	learn: 0.8943054	test: 0.9894226	best: 0.9892468 (1934)	total: 30s	remaining: 3m 5s
2000:	learn: 0.8925403	test: 0.9895378	best: 0.9892468 (1934)	total: 30.8s	remaining: 3m 4s
2050:	learn: 0.8910037	test: 0.9896220	best: 0.9892468 (1934)	total: 31.6s	remaining

0:	learn: 1.2445331	test: 1.2443720	best: 1.2443720 (0)	total: 18ms	remaining: 4m 12s
50:	learn: 1.0137252	test: 1.0212903	best: 1.0212903 (50)	total: 599ms	remaining: 2m 43s
100:	learn: 0.9662791	test: 0.9948029	best: 0.9948029 (100)	total: 1.18s	remaining: 2m 41s
150:	learn: 0.9377961	test: 0.9878534	best: 0.9878534 (150)	total: 1.75s	remaining: 2m 40s
200:	learn: 0.9175488	test: 0.9858940	best: 0.9858940 (200)	total: 2.32s	remaining: 2m 39s
250:	learn: 0.9008989	test: 0.9856668	best: 0.9854112 (239)	total: 2.87s	remaining: 2m 37s
300:	learn: 0.8860655	test: 0.9856053	best: 0.9854112 (239)	total: 3.44s	remaining: 2m 36s
350:	learn: 0.8727155	test: 0.9862793	best: 0.9854112 (239)	total: 4.01s	remaining: 2m 36s
400:	learn: 0.8596705	test: 0.9865127	best: 0.9854112 (239)	total: 4.58s	remaining: 2m 35s
450:	learn: 0.8475900	test: 0.9871829	best: 0.9854112 (239)	total: 5.15s	remaining: 2m 34s
500:	learn: 0.8359790	test: 0.9878968	best: 0.9854112 (239)	total: 5.72s	remaining: 2m 34s
550:	l

450:	learn: 0.8073002	test: 0.9842889	best: 0.9817389 (244)	total: 6.44s	remaining: 3m 13s
500:	learn: 0.7923514	test: 0.9858546	best: 0.9817389 (244)	total: 7.14s	remaining: 3m 12s
550:	learn: 0.7783256	test: 0.9863752	best: 0.9817389 (244)	total: 7.86s	remaining: 3m 11s
600:	learn: 0.7655157	test: 0.9871261	best: 0.9817389 (244)	total: 8.54s	remaining: 3m 10s
650:	learn: 0.7528067	test: 0.9879964	best: 0.9817389 (244)	total: 9.2s	remaining: 3m 8s
700:	learn: 0.7402714	test: 0.9889229	best: 0.9817389 (244)	total: 9.88s	remaining: 3m 7s
750:	learn: 0.7277220	test: 0.9892696	best: 0.9817389 (244)	total: 10.6s	remaining: 3m 6s
800:	learn: 0.7158901	test: 0.9899015	best: 0.9817389 (244)	total: 11.3s	remaining: 3m 6s
850:	learn: 0.7040639	test: 0.9903952	best: 0.9817389 (244)	total: 12s	remaining: 3m 5s
900:	learn: 0.6928445	test: 0.9916979	best: 0.9817389 (244)	total: 12.8s	remaining: 3m 5s
950:	learn: 0.6822439	test: 0.9928196	best: 0.9817389 (244)	total: 13.5s	remaining: 3m 4s
1000:	lea

850:	learn: 0.7883652	test: 0.9869502	best: 0.9821874 (315)	total: 9.5s	remaining: 2m 26s
900:	learn: 0.7805464	test: 0.9879248	best: 0.9821874 (315)	total: 10.1s	remaining: 2m 26s
950:	learn: 0.7729535	test: 0.9881926	best: 0.9821874 (315)	total: 10.6s	remaining: 2m 25s
1000:	learn: 0.7654264	test: 0.9890145	best: 0.9821874 (315)	total: 11.2s	remaining: 2m 25s
1050:	learn: 0.7578524	test: 0.9892431	best: 0.9821874 (315)	total: 11.8s	remaining: 2m 24s
1100:	learn: 0.7510834	test: 0.9895333	best: 0.9821874 (315)	total: 12.3s	remaining: 2m 24s
1150:	learn: 0.7442457	test: 0.9900040	best: 0.9821874 (315)	total: 12.9s	remaining: 2m 23s
1200:	learn: 0.7376504	test: 0.9904999	best: 0.9821874 (315)	total: 13.4s	remaining: 2m 23s
1250:	learn: 0.7310385	test: 0.9910171	best: 0.9821874 (315)	total: 14s	remaining: 2m 22s
1300:	learn: 0.7243081	test: 0.9916172	best: 0.9821874 (315)	total: 14.6s	remaining: 2m 22s
bestTest = 0.9821873884
bestIteration = 315
Shrink model to first 316 iterations.
5 20

1100:	learn: 0.7268387	test: 0.9893664	best: 0.9803221 (240)	total: 17s	remaining: 3m 19s
1150:	learn: 0.7191617	test: 0.9902733	best: 0.9803221 (240)	total: 17.8s	remaining: 3m 18s
1200:	learn: 0.7115743	test: 0.9909384	best: 0.9803221 (240)	total: 18.5s	remaining: 3m 17s
bestTest = 0.9803220576
bestIteration = 240
Shrink model to first 241 iterations.
16 20 93.16462598937005 0.03968048384847722
0.980322057565991
| [0m 20      [0m | [0m-0.9803  [0m | [0m 0.03968 [0m | [0m 16.78   [0m | [0m 93.16   [0m | [0m 20.62   [0m |
{'target': -0.9799688993025314, 'params': {'bagging_temperature': 0.057477425143432195, 'depth': 14.885048679549936, 'l2_leaf_reg': 96.96301937199281, 'max_leaves': 20.49876963537232}}


In [None]:
## 原理理解
## https://github.com/fmfn/BayesianOptimization/blob/master/examples/visualization.ipynb
