In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
df_spec_train = pd.read_pickle('../feats/spec_features.pkl')
df_spec_test = pd.read_pickle('../feats/spec_features_test.pkl')

In [6]:
df_train = pd.merge(df_train, df_spec_train, on='index')
df_test = pd.merge(df_test, df_spec_test, on='index')

In [7]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [14]:
db = DFDB('../trial/gradientboosting.pkl', auto_commit=False)

In [9]:
lgbm_top200 = ['spkt_welch_densitycoeff_2',
 'spkt_welch_density__coeff_3',
 'q25_roll_std_100',
 '3th_peak_freq',
 'min_roll_std_100',
 'q05_roll_std_100',
 'iqr_6',
 'abs_max_8',
 'mfcc_5_abs_mean',
 "number_peaks{'n': 10}",
 'ave10_7',
 'mfcc_13_mean',
 "autocorrelation{'lag': 5}",
 'mfcc_accelerate_8_variance',
 'abs_max_7',
 'mfcc_accelerate_1_kurtosis',
 'q05_roll_std_1000',
 'spkt_welch_density__coeff_42',
 'max_to_min_diff_5',
 'mfcc_13_quantile25',
 'fft_coefficientcoeff_80__attr_"imag"',
 'abs_q25_5',
 'mfcc_5_mean',
 'median__roll_std',
 '5000skewness_max_',
 'fft_coefficientcoeff_6__attr_"abs"',
 'partial_autocorrelationlag_5',
 'abs_min_8',
 'spkt_welch_density__coeff_28',
 'ar_coefficientk_10__coeff_3',
 'abs_q75_7',
 'mfcc_accelerate_15_min',
 'abs_max_4',
 'mfcc_10_quantile25',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_79',
 "value_count{'value': 1}",
 '5000peak_peak_amp_max_',
 'min__roll_std',
 'mfcc_4_rolling_std_mean',
 'q01_roll_std_1000',
 'mfcc_3_abs_q75',
 'mfcc_5_quantile95',
 'fft_coefficientcoeff_16__attr_"imag"',
 'abs_q01_4',
 'mfcc_6_quantile05',
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 '5000quantile25skewness_',
 'mfcc_0_abs_q25',
 'mfcc_delta_3_quantile01',
 'spkt_welch_density__coeff_99',
 'mfcc_9_mean',
 'mfcc_12_mean',
 'q75_roll_mean_10',
 'max_to_min',
 'mfcc_3_abs_mean',
 'kurt_7',
 'mfcc_14_quantile01',
 'mfcc_10_mean',
 'mfcc_delta_13_min',
 'mfcc_delta_13_abs_max',
 '5000no_zero_crossing_mean_',
 'abs_q75_6',
 'mfcc_12_quantile99',
 'mfcc_accelerate_6_skewness',
 'mfcc_5_max',
 '5000smoothness_entropy_',
 '5000median_skewness_',
 'mfcc_accelerate_8_min',
 'abs_min_3',
 '5000quantile75mean_',
 "number_crossing_m{'m': 1}",
 '5000smoothness_std_',
 'mfcc_9_abs_q25',
 'flac3_1_quantile99',
 'mfcc_5_skewness',
 'fft_coefficientcoeff_56__attr_"angle"',
 'fft_coefficientcoeff_70__attr_"abs"',
 'mfcc_10_abs_q75',
 'fft_coefficientcoeff_24__attr_"angle"',
 'med_7',
 'spkt_welch_density__coeff_73',
 'abs_q99_8',
 'ave10_6',
 'spkt_welch_density__coeff_38',
 'skew_1',
 'mfcc_delta_3_abs_q95',
 "change_quantiles{'ql': 0.6, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}",
 'max_to_min_5',
 'mfcc_delta_4_quantile75',
 'abs_max_1',
 '5000crest_factor_quantile75',
 'partial_autocorrelationlag_1',
 'mfcc_delta_11_quantile01',
 'mfcc_accelerate_7_quantile01',
 'mfcc_8_rolling_std_mean',
 'q95_roll_mean_100',
 'mfcc_delta_6_rolling_std_mean',
 'flac3_0_min',
 'fft_coefficientcoeff_36__attr_"abs"',
 'iqr_8',
 'max_9',
 '5000smoothness_quantile05',
 'mfcc_1_kurtosis',
 'mfcc_7_abs_q95',
 'spkt_welch_density__coeff_66',
 'spkt_welch_density__coeff_64',
 'mfcc_3_abs_q95',
 '5000skewness_mean_',
 'mfcc_10_abs_mean',
 '5000quantile99quantile01',
 'mean_change_rate',
 'mfcc_accelerate_5_rolling_std_mean',
 'mfcc_accelerate_5_quantile05',
 '5000rms_median_',
 'flac3_1_abs_mean',
 '5000rms_quantile05',
 '5000quantile75quantile25',
 'mfcc_accelerate_12_min',
 "value_count{'value': -1}",
 'mfcc_12_abs_max',
 'abs_min_5',
 'mfcc_5_abs_q99',
 'mfcc_delta_5_quantile01',
 'mfcc_12_quantile75',
 'fft_coefficientcoeff_56__attr_"imag"',
 'spkt_welch_densitycoeff_5',
 'mfcc_0_abs_q05',
 'mfcc_13_quantile75',
 'mfcc_delta_5_min',
 'mfcc_5_quantile99',
 'fft_coefficientcoeff_8__attr_"angle"',
 'spkt_welch_density__coeff_30',
 'mfcc_accelerate_4_max',
 'mfcc_14_median',
 "change_quantiles{'ql': 0.2, 'qh': 0.4, 'isabs': False, 'f_agg': 'var'}",
 'mfcc_5_median',
 'mfcc_accelerate_5_abs_q75',
 'spkt_welch_density__coeff_59',
 'mfcc_accelerate_5_kurtosis',
 'mfcc_delta_9_quantile99',
 'mfcc_5_quantile25',
 'spkt_welch_density__coeff_113',
 'mfcc_2_quantile05',
 'spkt_welch_density__coeff_58',
 'mfcc_5_abs_q75',
 'spkt_welch_density__coeff_22',
 'spkt_welch_density__coeff_115',
 'spkt_welch_density__coeff_4',
 'mfcc_5_abs_q05',
 'spkt_welch_density__coeff_25',
 'mfcc_5_abs_max',
 'spkt_welch_density__coeff_27',
 'mfcc_4_quantile75',
 'mfcc_3_rolling_std_mean',
 'mfcc_4_median',
 'mfcc_0_quantile01',
 '5000quantile75rssq_',
 'fft_coefficientcoeff_24__attr_"imag"',
 'mfcc_delta_12_kurtosis',
 '5000rms_quantile25',
 'mfcc_13_quantile05',
 'fft_coefficientcoeff_8__attr_"imag"',
 'mfcc_delta_11_rolling_std_mean',
 'mfcc_delta_12_quantile99',
 'mfcc_accelerate_9_abs_q25',
 'mfcc_11_abs_mean',
 'mfcc_delta_15_abs_max',
 'abs_max_2',
 'mfcc_11_abs_std',
 'abs_max_roll_mean_1000',
 'peak_to_average_power_ratio__roll_mean',
 'mfcc_13_median',
 'fft_coefficientcoeff_62__attr_"abs"',
 'mfcc_11_quantile75',
 "quantile{'q': 0.8}",
 'mfcc_delta_3_rolling_std_mean',
 'abs_q99_7',
 'mfcc_delta_12_max',
 'mfcc_12_min',
 'mfcc_10_quantile99',
 'mfcc_14_abs_q99',
 'mfcc_14_quantile05',
 'mfcc_14_quantile25',
 'kurt_1',
 'mfcc_2_median',
 'q01_2',
 'mfcc_2_abs_q25',
 'kurt_8',
 'mfcc_accelerate_6_abs_max',
 'mfcc_1_quantile75',
 'q05_5',
 'abs_q95_9',
 'q05_roll_mean_100',
 'mfcc_10_abs_q25',
 'q75_9',
 'mfcc_15_mean',
 "change_quantiles{'ql': 0.6, 'qh': 0.8, 'isabs': True, 'f_agg': 'mean'}",
 'flac3_0_abs_std',
 "change_quantiles{'ql': 0.4, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}"]

In [17]:
df_trial = db.select()
df_trial['kfold-type'] = df_trial['param'].apply(lambda x: x['kfold']['type'])
df_trial['algorithm-init'] = df_trial['param'].apply(lambda x: x['algorithm']['init'])

In [20]:
df_trial[(df_trial['kfold-type']=='group')&(df_trial['mae_diff']<.1)][['datetime','nfeatures', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark','algorithm-init']].sort_values(by=['val_mae'], ascending=True).head()

Unnamed: 0,datetime,nfeatures,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark.1,algorithm-init
688,2019-05-24 08:14:27.727830,20,tune 545,1.974755,0.011996,2.074522,0.535916,0.099766,tune 545,"{'max_depth': 2, 'learning_rate': 0.043893126214312544, 'n_estimators': 188, 'subsample': 0.8120288957009124, 'alpha': 0.5039403539659748, 'random_state': 1710}"
699,2019-05-24 08:33:14.514634,20,tune 545,1.983069,0.012316,2.077436,0.544086,0.094368,tune 545,"{'max_depth': 3, 'learning_rate': 0.017867746973556756, 'n_estimators': 183, 'subsample': 0.9364071303380447, 'alpha': 0.7886548218635598, 'random_state': 1653}"
693,2019-05-24 08:22:05.220497,20,tune 545,1.982695,0.012261,2.078618,0.540848,0.095923,tune 545,"{'max_depth': 2, 'learning_rate': 0.026197777198427176, 'n_estimators': 277, 'subsample': 0.8346906223847272, 'alpha': 0.35154651885746163, 'random_state': 903}"
558,2019-05-24 02:55:27.793002,20,tune 545,1.986713,0.01227,2.079719,0.542177,0.093006,tune 545,"{'max_depth': 2, 'learning_rate': 0.021979035021830073, 'n_estimators': 307, 'subsample': 0.605044594079294, 'alpha': 0.7995945142008916, 'random_state': 7237}"
562,2019-05-24 03:04:12.698725,20,tune 545,1.992705,0.012423,2.080707,0.546339,0.088002,tune 545,"{'max_depth': 2, 'learning_rate': 0.021069858613856404, 'n_estimators': 293, 'subsample': 0.6051914971875781, 'alpha': 0.7927491729365248, 'random_state': 6518}"


In [26]:
param = {'columns':lgbm_top200 ,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init': {}},
 'algorithm': {'cls': 'GradientBoostingRegressor',
  'init': {'max_depth': 3,
   'learning_rate': 0.017867746973556756,
   'n_estimators': 183,
   'subsample': 0.9364071303380447,
   'alpha': 0.7886548218635598,
   'random_state': 1653},
  'fit': {}}}

In [27]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=30, nfeats_removed_per_try=10, key='average_permutation_weight', remark='rfe to 30 group3')

In [28]:
for trial_i in mytrial:
    db.insert(trial_i)
db.commit()

In [41]:
df_trial = db.select()
param = df_trial[df_trial['remark']=='rfe to 30 group3'].tail(1)['param'].values[0]
score = df_trial[df_trial['remark']=='rfe to 30 group3'].tail(1).val_mae.values[0]
index = df_trial[df_trial['remark']=='rfe to 30 group3'].tail(1).index.values[0]

In [44]:
mytrial=[]
EP.width_frist_rfe(df_train, param, mytrial, score, df_test=df_test, remark='wf {}'.format(index))

In [45]:
for trial_i in mytrial:
    db.insert(trial_i)
db.commit()

In [50]:
df_trial = db.select()

In [52]:
df_trial[(df_trial['remark']=='wf 963')].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head()

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1244,2019-06-01 23:38:10.626053,wf 963,18,1.97201,0.002385,2.042018,0.011252,0.070009
1270,2019-06-01 23:46:42.448091,wf 963,17,1.972417,0.002367,2.042295,0.011503,0.069879
1266,2019-06-01 23:45:24.982259,wf 963,17,1.972576,0.002401,2.042318,0.011161,0.069742
1262,2019-06-01 23:44:08.020659,wf 963,17,1.97247,0.002362,2.042351,0.011206,0.069881
1267,2019-06-01 23:45:44.097592,wf 963,17,1.972471,0.002338,2.042364,0.011278,0.069893


In [54]:
index = df_trial[(df_trial['remark']=='wf 963')].sort_values(by=['val_mae']).head(1).index.values[0]

In [56]:
'tune {} g3'.format(index)

'tune 1244 g3'

In [57]:
mytrial =[]

###  tune hypterparameters
def objective(trial):
    
    max_depth = trial.suggest_int('max_depth', 2, 6)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    n_estimators = trial.suggest_int('n_estimators', 100,500)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    alpha = trial.suggest_uniform('alpha', 0.00001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':df_trial.loc[index]['param']['columns'],
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'GradientBoostingRegressor',
            'init':{
                "max_depth":max_depth,
                "learning_rate":learning_rate,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "alpha":alpha,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune {} g3'.format(index))
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-06-01 23:59:00,614] Finished a trial resulted in value: 2.9055623212402732. Current best value is 2.9055623212402732 with parameters: {'max_depth': 5, 'learning_rate': 0.3091968002230341, 'n_estimators': 331, 'subsample': 0.6954440383830816, 'alpha': 0.8682708855654327, 'random_state': 9565}.
[I 2019-06-02 00:00:30,854] Finished a trial resulted in value: 4.043981308311323. Current best value is 2.9055623212402732 with parameters: {'max_depth': 5, 'learning_rate': 0.3091968002230341, 'n_estimators': 331, 'subsample': 0.6954440383830816, 'alpha': 0.8682708855654327, 'random_state': 9565}.
[I 2019-06-02 00:01:03,220] Finished a trial resulted in value: 1.6620919802352028. Current best value is 1.6620919802352028 with parameters: {'max_depth': 4, 'learning_rate': 0.2728259523627895, 'n_estimators': 261, 'subsample': 0.6049360593547428, 'alpha': 0.9111149663826539, 'random_state': 1128}.
[I 2019-06-02 00:01:39,754] Finished a trial resulted in value: 1.3623919744241175. Current bes

[I 2019-06-02 00:11:58,098] Finished a trial resulted in value: 0.3069701789839476. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:12:29,521] Finished a trial resulted in value: 0.36337252359066463. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:13:30,824] Finished a trial resulted in value: 3.1218380821423133. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:13:51,902] Finished a trial resulted in value: 1.1380303719319267. 

[I 2019-06-02 00:27:32,471] Finished a trial resulted in value: 0.26562393928268907. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:27:51,192] Finished a trial resulted in value: 0.4135736349726699. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:28:16,179] Finished a trial resulted in value: 0.4326182190317911. Current best value is 0.05979859307100809 with parameters: {'max_depth': 2, 'learning_rate': 0.011875750651251793, 'n_estimators': 146, 'subsample': 0.9110177309007167, 'alpha': 0.4913037470983478, 'random_state': 9788}.
[I 2019-06-02 00:28:53,418] Finished a trial resulted in value: 0.3740184318740552. 

[I 2019-06-02 00:33:49,390] Finished a trial resulted in value: 0.2935865794282383. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:34:00,456] Finished a trial resulted in value: 0.0974941408363072. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:34:22,133] Finished a trial resulted in value: 0.522547011703548. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:34:38,510] Finished a trial resulted in value: 0.3103285328987288.

[I 2019-06-02 00:45:17,013] Finished a trial resulted in value: 0.19819956611421016. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:45:32,558] Finished a trial resulted in value: 0.14917486925966528. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:45:45,401] Finished a trial resulted in value: 0.27088605738171245. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:45:52,546] Finished a trial resulted in value: 0.3842292162597

[I 2019-06-02 00:54:55,870] Finished a trial resulted in value: 0.14733982767326034. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:55:07,266] Finished a trial resulted in value: 0.17421869663929576. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:55:43,262] Finished a trial resulted in value: 1.4929677167976805. Current best value is 0.05004608384579216 with parameters: {'max_depth': 2, 'learning_rate': 0.010315078019334158, 'n_estimators': 143, 'subsample': 0.7872380763445552, 'alpha': 0.15167077293319026, 'random_state': 6593}.
[I 2019-06-02 00:55:50,077] Finished a trial resulted in value: 0.28302708091344

[I 2019-06-02 01:01:12,419] Finished a trial resulted in value: 0.13074567579130789. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:01:25,715] Finished a trial resulted in value: 0.08896387882220147. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:01:33,718] Finished a trial resulted in value: 0.15263370457609432. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:01:41,168] Finished a trial resulted in value: 0.0723777202931

[I 2019-06-02 01:08:27,596] Finished a trial resulted in value: 0.160649549715072. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:08:38,123] Finished a trial resulted in value: 0.3377308688161607. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:08:54,965] Finished a trial resulted in value: 0.2142630407337505. Current best value is 0.038806879505425614 with parameters: {'max_depth': 2, 'learning_rate': 0.010399516818297067, 'n_estimators': 117, 'subsample': 0.7790383430161657, 'alpha': 0.3171769320961716, 'random_state': 9760}.
[I 2019-06-02 01:09:09,448] Finished a trial resulted in value: 0.3354415856419269.

In [58]:
for trial_i in mytrial:
    db.insert(trial_i)
db.commit()

In [59]:
df_trial = db.select()

In [60]:
df_trial[(df_trial['remark']=='tune {} g3'.format(index))].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head()

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1348,2019-06-02 00:32:25.693485,tune 1244 g3,18,1.918393,0.001876,2.025865,0.012096,0.107472
1371,2019-06-02 00:40:56.269721,tune 1244 g3,18,1.90851,0.00176,2.026211,0.012165,0.117701
1473,2019-06-02 01:12:51.096091,tune 1244 g3,18,1.914584,0.001846,2.027032,0.012097,0.112447
1389,2019-06-02 00:48:14.974172,tune 1244 g3,18,1.894791,0.00162,2.027202,0.012311,0.132411
1411,2019-06-02 00:56:27.137362,tune 1244 g3,18,1.87341,0.001427,2.027264,0.01266,0.153854


In [61]:
mytrial =[]

###  tune hypterparameters
def objective(trial):
    
    max_depth = trial.suggest_int('max_depth', 2, 6)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    n_estimators = trial.suggest_int('n_estimators', 100,500)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    alpha = trial.suggest_uniform('alpha', 0.00001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':df_trial.loc[index]['param']['columns'],
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'GradientBoostingRegressor',
            'init':{
                "max_depth":max_depth,
                "learning_rate":learning_rate,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "alpha":alpha,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune {} s3'.format(index))
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-06-02 01:17:43,376] Finished a trial resulted in value: 1.2596003724969773. Current best value is 1.2596003724969773 with parameters: {'max_depth': 4, 'learning_rate': 0.2545281183677018, 'n_estimators': 491, 'subsample': 0.7822160038491861, 'alpha': 0.37472665685553264, 'random_state': 8420}.
[I 2019-06-02 01:18:31,501] Finished a trial resulted in value: 1.4527204939579563. Current best value is 1.2596003724969773 with parameters: {'max_depth': 4, 'learning_rate': 0.2545281183677018, 'n_estimators': 491, 'subsample': 0.7822160038491861, 'alpha': 0.37472665685553264, 'random_state': 8420}.
[I 2019-06-02 01:18:52,272] Finished a trial resulted in value: 0.225252258087415. Current best value is 0.225252258087415 with parameters: {'max_depth': 2, 'learning_rate': 0.2900913267146146, 'n_estimators': 277, 'subsample': 0.9736091241517164, 'alpha': 0.5494964901244939, 'random_state': 3914}.
[I 2019-06-02 01:19:19,661] Finished a trial resulted in value: 0.2286548240069377. Current be

[I 2019-06-02 01:30:30,059] Finished a trial resulted in value: 0.16292594148494255. Current best value is 0.02103820488967844 with parameters: {'max_depth': 3, 'learning_rate': 0.010772604344559178, 'n_estimators': 118, 'subsample': 0.6765882268424563, 'alpha': 0.863850938998015, 'random_state': 2727}.
[I 2019-06-02 01:31:26,609] Finished a trial resulted in value: 0.5645516318011752. Current best value is 0.02103820488967844 with parameters: {'max_depth': 3, 'learning_rate': 0.010772604344559178, 'n_estimators': 118, 'subsample': 0.6765882268424563, 'alpha': 0.863850938998015, 'random_state': 2727}.
[I 2019-06-02 01:32:29,178] Finished a trial resulted in value: 0.6331111130465864. Current best value is 0.02103820488967844 with parameters: {'max_depth': 3, 'learning_rate': 0.010772604344559178, 'n_estimators': 118, 'subsample': 0.6765882268424563, 'alpha': 0.863850938998015, 'random_state': 2727}.
[I 2019-06-02 01:33:02,759] Finished a trial resulted in value: 0.4295049607872116. Cur

[I 2019-06-02 01:44:33,227] Finished a trial resulted in value: 0.3696498392406717. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 01:44:48,745] Finished a trial resulted in value: 0.03136741094742413. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 01:45:24,819] Finished a trial resulted in value: 0.03955614461182735. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 01:45:42,088] Finished a trial resulted in value: 0.09547837965932748. Curr

[I 2019-06-02 01:59:07,057] Finished a trial resulted in value: 0.16990977965343165. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:00:06,108] Finished a trial resulted in value: 0.7282234205501. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:00:35,393] Finished a trial resulted in value: 0.0682076632672949. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:01:06,219] Finished a trial resulted in value: 0.1912172890803471. Current b

[I 2019-06-02 02:11:37,943] Finished a trial resulted in value: 0.6011214842867639. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:12:06,397] Finished a trial resulted in value: 0.09722567371311262. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:12:16,763] Finished a trial resulted in value: 0.1409227722412614. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:12:39,528] Finished a trial resulted in value: 0.0453069604409915. Curren

[I 2019-06-02 02:25:55,419] Finished a trial resulted in value: 0.08676776392834239. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:26:25,296] Finished a trial resulted in value: 0.37234340221869233. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:26:53,826] Finished a trial resulted in value: 0.1266685653902515. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:27:04,138] Finished a trial resulted in value: 0.04610952769289098. Curr

[I 2019-06-02 02:38:38,945] Finished a trial resulted in value: 0.045874669791724734. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:39:05,194] Finished a trial resulted in value: 0.15305400319884568. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:39:37,183] Finished a trial resulted in value: 0.6707287391184495. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:39:54,281] Finished a trial resulted in value: 0.06242353499288026. Cur

[I 2019-06-02 02:51:17,263] Finished a trial resulted in value: 0.1771333493699967. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:51:39,125] Finished a trial resulted in value: 0.08572227575992417. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:52:32,419] Finished a trial resulted in value: 0.2655192572182256. Current best value is 0.0203286486952381 with parameters: {'max_depth': 2, 'learning_rate': 0.01131304357864059, 'n_estimators': 223, 'subsample': 0.9335005964214178, 'alpha': 0.9462246498187932, 'random_state': 2004}.
[I 2019-06-02 02:52:44,954] Finished a trial resulted in value: 0.12520318431451624. Curre

In [62]:
for trial_i in mytrial:
    db.insert(trial_i)
db.commit()

In [63]:
df_trial = db.select()

In [66]:
df_trial[(df_trial['remark']=='tune {} s3'.format(index))].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(100)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1587,2019-06-02 02:15:03.812609,tune 1244 s3,18,0.969829,1.437209e-06,1.802333,0.0001505971,0.832505
1514,2019-06-02 01:37:03.925087,tune 1244 s3,18,1.111859,6.366843e-05,1.808884,6.245833e-05,0.697025
1523,2019-06-02 01:41:49.578950,tune 1244 s3,18,1.130381,2.27581e-06,1.810158,1.729862e-05,0.679777
1483,2019-06-02 01:22:26.099460,tune 1244 s3,18,1.133658,1.027972e-05,1.814085,3.939121e-05,0.680427
1538,2019-06-02 01:51:00.907730,tune 1244 s3,18,1.393934,5.558565e-05,1.822186,2.766653e-05,0.428252
1534,2019-06-02 01:49:05.266859,tune 1244 s3,18,1.374218,2.436196e-05,1.82922,1.728136e-05,0.455002
1487,2019-06-02 01:24:55.227607,tune 1244 s3,18,1.472889,1.406026e-05,1.842741,0.0001229382,0.369853
1601,2019-06-02 02:22:56.391940,tune 1244 s3,18,1.549798,3.931293e-06,1.843614,9.6131e-05,0.293815
1503,2019-06-02 01:31:26.604778,tune 1244 s3,18,1.543662,4.256039e-06,1.848991,6.934888e-05,0.30533
1504,2019-06-02 01:32:29.174172,tune 1244 s3,18,1.509661,2.491634e-05,1.85159,5.807713e-05,0.341928
