In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
df_spec_train = pd.read_pickle('../feats/spec_features.pkl')
df_spec_test = pd.read_pickle('../feats/spec_features_test.pkl')

In [6]:
df_train = pd.merge(df_train, df_spec_train, on='index')
df_test = pd.merge(df_test, df_spec_test, on='index')

In [7]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [8]:
db = DFDB('../trial/xgbm.pkl', auto_commit=False)

In [9]:
lgbm_top200 = ['spkt_welch_densitycoeff_2',
 'spkt_welch_density__coeff_3',
 'q25_roll_std_100',
 '3th_peak_freq',
 'min_roll_std_100',
 'q05_roll_std_100',
 'iqr_6',
 'abs_max_8',
 'mfcc_5_abs_mean',
 "number_peaks{'n': 10}",
 'ave10_7',
 'mfcc_13_mean',
 "autocorrelation{'lag': 5}",
 'mfcc_accelerate_8_variance',
 'abs_max_7',
 'mfcc_accelerate_1_kurtosis',
 'q05_roll_std_1000',
 'spkt_welch_density__coeff_42',
 'max_to_min_diff_5',
 'mfcc_13_quantile25',
 'fft_coefficientcoeff_80__attr_"imag"',
 'abs_q25_5',
 'mfcc_5_mean',
 'median__roll_std',
 '5000skewness_max_',
 'fft_coefficientcoeff_6__attr_"abs"',
 'partial_autocorrelationlag_5',
 'abs_min_8',
 'spkt_welch_density__coeff_28',
 'ar_coefficientk_10__coeff_3',
 'abs_q75_7',
 'mfcc_accelerate_15_min',
 'abs_max_4',
 'mfcc_10_quantile25',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_79',
 "value_count{'value': 1}",
 '5000peak_peak_amp_max_',
 'min__roll_std',
 'mfcc_4_rolling_std_mean',
 'q01_roll_std_1000',
 'mfcc_3_abs_q75',
 'mfcc_5_quantile95',
 'fft_coefficientcoeff_16__attr_"imag"',
 'abs_q01_4',
 'mfcc_6_quantile05',
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 '5000quantile25skewness_',
 'mfcc_0_abs_q25',
 'mfcc_delta_3_quantile01',
 'spkt_welch_density__coeff_99',
 'mfcc_9_mean',
 'mfcc_12_mean',
 'q75_roll_mean_10',
 'max_to_min',
 'mfcc_3_abs_mean',
 'kurt_7',
 'mfcc_14_quantile01',
 'mfcc_10_mean',
 'mfcc_delta_13_min',
 'mfcc_delta_13_abs_max',
 '5000no_zero_crossing_mean_',
 'abs_q75_6',
 'mfcc_12_quantile99',
 'mfcc_accelerate_6_skewness',
 'mfcc_5_max',
 '5000smoothness_entropy_',
 '5000median_skewness_',
 'mfcc_accelerate_8_min',
 'abs_min_3',
 '5000quantile75mean_',
 "number_crossing_m{'m': 1}",
 '5000smoothness_std_',
 'mfcc_9_abs_q25',
 'flac3_1_quantile99',
 'mfcc_5_skewness',
 'fft_coefficientcoeff_56__attr_"angle"',
 'fft_coefficientcoeff_70__attr_"abs"',
 'mfcc_10_abs_q75',
 'fft_coefficientcoeff_24__attr_"angle"',
 'med_7',
 'spkt_welch_density__coeff_73',
 'abs_q99_8',
 'ave10_6',
 'spkt_welch_density__coeff_38',
 'skew_1',
 'mfcc_delta_3_abs_q95',
 "change_quantiles{'ql': 0.6, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}",
 'max_to_min_5',
 'mfcc_delta_4_quantile75',
 'abs_max_1',
 '5000crest_factor_quantile75',
 'partial_autocorrelationlag_1',
 'mfcc_delta_11_quantile01',
 'mfcc_accelerate_7_quantile01',
 'mfcc_8_rolling_std_mean',
 'q95_roll_mean_100',
 'mfcc_delta_6_rolling_std_mean',
 'flac3_0_min',
 'fft_coefficientcoeff_36__attr_"abs"',
 'iqr_8',
 'max_9',
 '5000smoothness_quantile05',
 'mfcc_1_kurtosis',
 'mfcc_7_abs_q95',
 'spkt_welch_density__coeff_66',
 'spkt_welch_density__coeff_64',
 'mfcc_3_abs_q95',
 '5000skewness_mean_',
 'mfcc_10_abs_mean',
 '5000quantile99quantile01',
 'mean_change_rate',
 'mfcc_accelerate_5_rolling_std_mean',
 'mfcc_accelerate_5_quantile05',
 '5000rms_median_',
 'flac3_1_abs_mean',
 '5000rms_quantile05',
 '5000quantile75quantile25',
 'mfcc_accelerate_12_min',
 "value_count{'value': -1}",
 'mfcc_12_abs_max',
 'abs_min_5',
 'mfcc_5_abs_q99',
 'mfcc_delta_5_quantile01',
 'mfcc_12_quantile75',
 'fft_coefficientcoeff_56__attr_"imag"',
 'spkt_welch_densitycoeff_5',
 'mfcc_0_abs_q05',
 'mfcc_13_quantile75',
 'mfcc_delta_5_min',
 'mfcc_5_quantile99',
 'fft_coefficientcoeff_8__attr_"angle"',
 'spkt_welch_density__coeff_30',
 'mfcc_accelerate_4_max',
 'mfcc_14_median',
 "change_quantiles{'ql': 0.2, 'qh': 0.4, 'isabs': False, 'f_agg': 'var'}",
 'mfcc_5_median',
 'mfcc_accelerate_5_abs_q75',
 'spkt_welch_density__coeff_59',
 'mfcc_accelerate_5_kurtosis',
 'mfcc_delta_9_quantile99',
 'mfcc_5_quantile25',
 'spkt_welch_density__coeff_113',
 'mfcc_2_quantile05',
 'spkt_welch_density__coeff_58',
 'mfcc_5_abs_q75',
 'spkt_welch_density__coeff_22',
 'spkt_welch_density__coeff_115',
 'spkt_welch_density__coeff_4',
 'mfcc_5_abs_q05',
 'spkt_welch_density__coeff_25',
 'mfcc_5_abs_max',
 'spkt_welch_density__coeff_27',
 'mfcc_4_quantile75',
 'mfcc_3_rolling_std_mean',
 'mfcc_4_median',
 'mfcc_0_quantile01',
 '5000quantile75rssq_',
 'fft_coefficientcoeff_24__attr_"imag"',
 'mfcc_delta_12_kurtosis',
 '5000rms_quantile25',
 'mfcc_13_quantile05',
 'fft_coefficientcoeff_8__attr_"imag"',
 'mfcc_delta_11_rolling_std_mean',
 'mfcc_delta_12_quantile99',
 'mfcc_accelerate_9_abs_q25',
 'mfcc_11_abs_mean',
 'mfcc_delta_15_abs_max',
 'abs_max_2',
 'mfcc_11_abs_std',
 'abs_max_roll_mean_1000',
 'peak_to_average_power_ratio__roll_mean',
 'mfcc_13_median',
 'fft_coefficientcoeff_62__attr_"abs"',
 'mfcc_11_quantile75',
 "quantile{'q': 0.8}",
 'mfcc_delta_3_rolling_std_mean',
 'abs_q99_7',
 'mfcc_delta_12_max',
 'mfcc_12_min',
 'mfcc_10_quantile99',
 'mfcc_14_abs_q99',
 'mfcc_14_quantile05',
 'mfcc_14_quantile25',
 'kurt_1',
 'mfcc_2_median',
 'q01_2',
 'mfcc_2_abs_q25',
 'kurt_8',
 'mfcc_accelerate_6_abs_max',
 'mfcc_1_quantile75',
 'q05_5',
 'abs_q95_9',
 'q05_roll_mean_100',
 'mfcc_10_abs_q25',
 'q75_9',
 'mfcc_15_mean',
 "change_quantiles{'ql': 0.6, 'qh': 0.8, 'isabs': True, 'f_agg': 'mean'}",
 'flac3_0_abs_std',
 "change_quantiles{'ql': 0.4, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}"]

In [11]:
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])
df_trial['algorithm-init'] = df_trial['param'].apply(lambda x: x['algorithm']['init'])

In [14]:
df_trial[df_trial['kfold-type']=='group'][['datetime','nfeatures', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']].sort_values(by=['val_mae'], ascending=True).head()

Unnamed: 0,datetime,nfeatures,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark.1
440,2019-05-06 07:48:27.216046,25,tune columns272,1.889476,0.055808,1.94578,0.611434,0.056304,tune columns272
328,2019-05-06 07:29:11.053666,25,tune columns272,1.921549,0.045514,1.946469,0.602273,0.02492,tune columns272
316,2019-05-06 07:27:06.754855,25,tune columns272,1.894958,0.042848,1.947048,0.610165,0.05209,tune columns272
435,2019-05-06 07:47:48.957295,25,tune columns272,1.919623,0.047471,1.947087,0.604345,0.027464,tune columns272
434,2019-05-06 07:47:35.692914,25,tune columns272,1.900924,0.042155,1.947729,0.607891,0.046805,tune columns272


In [17]:
param = {'columns': lgbm_top200,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init':{}},
 'algorithm': {'cls': 'xgb.XGBRegressor',
  'init': {'max_depth': 4,
   'max_bin': 67,
   'eta': 0.342568877200916,
   'colsample_bytree': 0.8864953019985605,
   'min_child_weight': 213,
   'n_estimators': 450,
   'subsample': 0.853698982949453,
   'reg_lambda': 0.982742342106065,
   'reg_alpha': 0.3136457598284541,
   'n_jobs': 16},
  'fit': {}},
        }

In [18]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=20, nfeats_removed_per_try=10, key='average_permutation_weight', remark='rfe to 20 group3')

In [19]:
EP.select_features_(df_train, param, mytrial, nfeats_best=20, nfeats_removed_per_try=10, key='average_model_weight', remark='rfe to 20 group3 model weight')

In [22]:
# pd.DataFrame(mytrial)[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

In [23]:
for trial_i in mytrial:
    db.insert(trial_i)

In [24]:
df_trial = db.select()

In [26]:
df_trial[df_trial['remark']=='rfe to 20 group3'][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1846,2019-05-31 03:58:27.136660,rfe to 20 group3,200,1.205982,0.000182,2.106715,0.00444,0.900733
1847,2019-05-31 04:09:29.695793,rfe to 20 group3,190,1.211057,0.000538,2.099602,0.004261,0.888545
1848,2019-05-31 04:20:08.066918,rfe to 20 group3,180,1.220032,9.8e-05,2.097443,0.004153,0.87741
1849,2019-05-31 04:28:16.141972,rfe to 20 group3,170,1.251079,0.000287,2.076615,0.003928,0.825536
1850,2019-05-31 04:33:53.630082,rfe to 20 group3,160,1.271805,0.000497,2.066525,0.00586,0.79472
1851,2019-05-31 04:39:44.418728,rfe to 20 group3,150,1.288827,0.000626,2.058341,0.006943,0.769514
1852,2019-05-31 04:44:54.479791,rfe to 20 group3,140,1.293057,0.000456,2.049283,0.006572,0.756226
1853,2019-05-31 04:50:17.943489,rfe to 20 group3,130,1.309402,0.000318,2.049743,0.005642,0.740341
1854,2019-05-31 04:56:00.099157,rfe to 20 group3,120,1.320676,0.000428,2.038732,0.004942,0.718056
1855,2019-05-31 05:01:24.725280,rfe to 20 group3,110,1.329129,0.000645,2.034923,0.005551,0.705794


In [27]:
score = df_trial.loc[1863].val_mae

In [29]:
param = df_trial.loc[1863]['param']

In [35]:
def width_frist_rfe(df_train, param, trial, score, df_test=None, remark=None):

    param_ = copy.deepcopy(param)
    columns_ = param_['columns']
    best_score = score
    best_param = param_
    for col in columns_:
        param_['columns'] = list(set(columns_) - set([col]))
        df_his, df_feature_importances, df_valid_pred, df_test_pred = EP.process(df_train, param_, df_test=df_test, trial=trial, is_output_feature_importance=False, remark=remark)
        val_mae_mean = np.mean(df_his.valid)
        if val_mae_mean<best_score:
            best_score = val_mae_mean
            best_param = copy.deepcopy(param_)

    if best_score < score:
        width_frist_rfe(df_train, best_param, trial, best_score, df_test, remark=remark)

    return

In [36]:
mytrial=[]
width_frist_rfe(df_train, param, mytrial, score, df_test=df_test, remark='wf 1863')

In [37]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [42]:
df_trial[(df_trial['remark']=='wf 1863')].sort_values(by=['val_mae'])[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1898,2019-05-31 12:17:39.334178,29,1.514048,0.000619,2.058923,0.007969,0.544875
1926,2019-05-31 13:48:17.397117,28,1.530666,0.000871,2.05902,0.009289,0.528354
1914,2019-05-31 13:16:08.308012,28,1.513636,0.000813,2.059239,0.009186,0.545604
1931,2019-05-31 14:00:31.498956,28,1.513678,0.000744,2.060922,0.009249,0.547244
1921,2019-05-31 13:35:44.147633,28,1.509612,0.000745,2.061995,0.008933,0.552383
1924,2019-05-31 13:43:23.669816,28,1.535808,0.000664,2.062789,0.00774,0.526981
1934,2019-05-31 14:07:56.717335,28,1.513826,0.000716,2.062998,0.007796,0.549172
1939,2019-05-31 14:21:36.084077,28,1.513263,0.000876,2.063143,0.008868,0.549879
1918,2019-05-31 13:27:15.843111,28,1.50966,0.000696,2.063802,0.008888,0.554142
1925,2019-05-31 13:45:49.835753,28,1.515608,0.000799,2.064581,0.008647,0.548973


In [12]:
df_trial.loc[[1898]][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1898,2019-05-31 12:17:39.334178,29,1.514048,0.000619,2.058923,0.007969,0.544875


In [23]:
mytrial = []
#  tune hypterparameters
def objective(trial):
        
    max_depth = trial.suggest_int('max_depth', 2, 6)
    max_bin = trial.suggest_int('max_bin', 10, 100)
    eta = trial.suggest_uniform('eta', 0.01, 0.4)
    colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.6, 1)
    min_child_weight = trial.suggest_int('min_child_weight', 200, 600)
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    reg_lambda = trial.suggest_loguniform('reg_lambda', 0.000001, 1.0)
    reg_alpha = trial.suggest_loguniform('reg_alpha', 0.000001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':copy.deepcopy(df_trial.loc[1898]['param']['columns']),
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'xgb.XGBRegressor',
            'init':{
                "max_depth":max_depth,
                "max_bin":max_bin,
                "eta":eta,
                "colsample_bytree":colsample_bytree,
                "min_child_weight":min_child_weight,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "reg_lambda":reg_lambda,
                "reg_alpha":reg_alpha,
                'n_jobs':16
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1898 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-06-01 07:52:43,113] Finished a trial resulted in value: 0.42356023632691836. Current best value is 0.42356023632691836 with parameters: {'max_depth': 5, 'max_bin': 48, 'eta': 0.3060805127166695, 'colsample_bytree': 0.9537029320296144, 'min_child_weight': 525, 'n_estimators': 921, 'subsample': 0.7798718416720616, 'reg_lambda': 0.03475615249839333, 'reg_alpha': 0.16089985929423653, 'random_state': 6492}.
[I 2019-06-01 07:55:00,892] Finished a trial resulted in value: 0.2081691882396913. Current best value is 0.2081691882396913 with parameters: {'max_depth': 3, 'max_bin': 74, 'eta': 0.2366986812624387, 'colsample_bytree': 0.823447742625986, 'min_child_weight': 402, 'n_estimators': 600, 'subsample': 0.6810351508186208, 'reg_lambda': 0.005417370497658727, 'reg_alpha': 5.722769507617206e-06, 'random_state': 3239}.
[I 2019-06-01 07:58:44,144] Finished a trial resulted in value: 0.33963855693794026. Current best value is 0.2081691882396913 with parameters: {'max_depth': 3, 'max_bin': 7

[I 2019-06-01 08:29:23,946] Finished a trial resulted in value: 0.07987002153766247. Current best value is 0.07596186942103911 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.020290192310066943, 'colsample_bytree': 0.627113553091716, 'min_child_weight': 211, 'n_estimators': 189, 'subsample': 0.995958702550503, 'reg_lambda': 7.934355737295271e-05, 'reg_alpha': 2.2005614484741933e-05, 'random_state': 9724}.
[I 2019-06-01 08:30:28,115] Finished a trial resulted in value: 0.17062948317114157. Current best value is 0.07596186942103911 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.020290192310066943, 'colsample_bytree': 0.627113553091716, 'min_child_weight': 211, 'n_estimators': 189, 'subsample': 0.995958702550503, 'reg_lambda': 7.934355737295271e-05, 'reg_alpha': 2.2005614484741933e-05, 'random_state': 9724}.
[I 2019-06-01 08:31:52,967] Finished a trial resulted in value: 0.16409277220710694. Current best value is 0.07596186942103911 with parameters: {'max_depth': 2, '

[I 2019-06-01 08:39:09,934] Finished a trial resulted in value: 0.10635800426802189. Current best value is 0.05168697969170115 with parameters: {'max_depth': 2, 'max_bin': 37, 'eta': 0.29072081131479866, 'colsample_bytree': 0.6289864697833525, 'min_child_weight': 292, 'n_estimators': 108, 'subsample': 0.6798546856598158, 'reg_lambda': 1.9408714762131848e-05, 'reg_alpha': 0.00037550461627423886, 'random_state': 5367}.
[I 2019-06-01 08:39:29,749] Finished a trial resulted in value: 0.5516815658204864. Current best value is 0.05168697969170115 with parameters: {'max_depth': 2, 'max_bin': 37, 'eta': 0.29072081131479866, 'colsample_bytree': 0.6289864697833525, 'min_child_weight': 292, 'n_estimators': 108, 'subsample': 0.6798546856598158, 'reg_lambda': 1.9408714762131848e-05, 'reg_alpha': 0.00037550461627423886, 'random_state': 5367}.
[I 2019-06-01 08:39:43,072] Finished a trial resulted in value: 0.28168180064649884. Current best value is 0.05168697969170115 with parameters: {'max_depth': 2

[I 2019-06-01 08:41:40,246] Finished a trial resulted in value: 0.054023690032993524. Current best value is 0.05168697969170115 with parameters: {'max_depth': 2, 'max_bin': 37, 'eta': 0.29072081131479866, 'colsample_bytree': 0.6289864697833525, 'min_child_weight': 292, 'n_estimators': 108, 'subsample': 0.6798546856598158, 'reg_lambda': 1.9408714762131848e-05, 'reg_alpha': 0.00037550461627423886, 'random_state': 5367}.
[I 2019-06-01 08:41:45,316] Finished a trial resulted in value: 0.1842667665330833. Current best value is 0.05168697969170115 with parameters: {'max_depth': 2, 'max_bin': 37, 'eta': 0.29072081131479866, 'colsample_bytree': 0.6289864697833525, 'min_child_weight': 292, 'n_estimators': 108, 'subsample': 0.6798546856598158, 'reg_lambda': 1.9408714762131848e-05, 'reg_alpha': 0.00037550461627423886, 'random_state': 5367}.
[I 2019-06-01 08:41:56,396] Finished a trial resulted in value: 0.26159927261032373. Current best value is 0.05168697969170115 with parameters: {'max_depth': 

[I 2019-06-01 08:42:51,057] Finished a trial resulted in value: 0.07356101591763196. Current best value is 0.04213413178337907 with parameters: {'max_depth': 2, 'max_bin': 49, 'eta': 0.320045817574662, 'colsample_bytree': 0.6617117961126273, 'min_child_weight': 476, 'n_estimators': 104, 'subsample': 0.6009558868443619, 'reg_lambda': 0.0004721735732033923, 'reg_alpha': 0.00011393841488398491, 'random_state': 3956}.
[I 2019-06-01 08:42:56,241] Finished a trial resulted in value: 0.112111132028664. Current best value is 0.04213413178337907 with parameters: {'max_depth': 2, 'max_bin': 49, 'eta': 0.320045817574662, 'colsample_bytree': 0.6617117961126273, 'min_child_weight': 476, 'n_estimators': 104, 'subsample': 0.6009558868443619, 'reg_lambda': 0.0004721735732033923, 'reg_alpha': 0.00011393841488398491, 'random_state': 3956}.
[I 2019-06-01 08:43:02,238] Finished a trial resulted in value: 0.15921439403883253. Current best value is 0.04213413178337907 with parameters: {'max_depth': 2, 'max_

[I 2019-06-01 08:44:56,195] Finished a trial resulted in value: 0.36402423980906284. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:45:02,719] Finished a trial resulted in value: 0.15777874161300168. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:45:04,969] Finished a trial resulted in value: 0.06558053102793604. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2

[I 2019-06-01 08:46:35,834] Finished a trial resulted in value: 0.13302943238745363. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:46:38,608] Finished a trial resulted in value: 0.13236329566485697. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:46:47,739] Finished a trial resulted in value: 0.12833482824923081. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2

[I 2019-06-01 08:48:24,044] Finished a trial resulted in value: 0.12398000966176083. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:48:31,226] Finished a trial resulted in value: 0.23171012443485445. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:48:33,293] Finished a trial resulted in value: 0.06484750974955475. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2

[I 2019-06-01 08:50:45,397] Finished a trial resulted in value: 0.06633115931219406. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:50:48,014] Finished a trial resulted in value: 0.06842957546540435. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:50:50,187] Finished a trial resulted in value: 0.06470448473080002. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2

[I 2019-06-01 08:51:44,152] Finished a trial resulted in value: 0.07035652895935617. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:51:47,896] Finished a trial resulted in value: 0.08228725615506939. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:51:57,055] Finished a trial resulted in value: 0.2151621042603358. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2,

[I 2019-06-01 08:53:24,322] Finished a trial resulted in value: 0.06242472558666212. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:53:37,570] Finished a trial resulted in value: 0.28589544912303455. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2, 'max_bin': 73, 'eta': 0.3339563695489065, 'colsample_bytree': 0.762828075706736, 'min_child_weight': 451, 'n_estimators': 100, 'subsample': 0.6003933374397961, 'reg_lambda': 0.00030861713680687845, 'reg_alpha': 1.4549556251466457e-06, 'random_state': 5934}.
[I 2019-06-01 08:53:42,720] Finished a trial resulted in value: 0.11764222691430613. Current best value is 0.039752993854131856 with parameters: {'max_depth': 2

In [24]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])
df_trial['algorithm-init'] = df_trial['param'].apply(lambda x: x['algorithm']['init'])

In [28]:
df_trial[(df_trial['remark']=='tune 1898 by stratified')&(df_trial['mae_diff']<.05)][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','algorithm-init']].sort_values(by=['val_mae']).head()

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,algorithm-init
2379,2019-06-01 08:52:10.734467,tune 1898 by stratified,29,1.882644,1.9e-05,1.930172,3.6e-05,0.047528,"{'max_depth': 2, 'max_bin': 90, 'eta': 0.3073809955940666, 'colsample_bytree': 0.6611223577644073, 'min_child_weight': 236, 'n_estimators': 324, 'subsample': 0.6339546318951624, 'reg_lambda': 1.9419181147741157e-05, 'reg_alpha': 1.8851064597864987e-06, 'n_jobs': 16}"
2219,2019-06-01 08:28:31.192030,tune 1898 by stratified,29,1.884007,1.3e-05,1.932437,6.7e-05,0.048431,"{'max_depth': 2, 'max_bin': 68, 'eta': 0.013665479623816174, 'colsample_bytree': 0.606638153945249, 'min_child_weight': 205, 'n_estimators': 307, 'subsample': 0.6071407235490203, 'reg_lambda': 0.00011218297825912067, 'reg_alpha': 6.723915065556982e-05, 'n_jobs': 16}"
2315,2019-06-01 08:46:32.311180,tune 1898 by stratified,29,1.885476,2.5e-05,1.934591,5.8e-05,0.049115,"{'max_depth': 2, 'max_bin': 99, 'eta': 0.18409091852908002, 'colsample_bytree': 0.9526334061236899, 'min_child_weight': 278, 'n_estimators': 312, 'subsample': 0.6602106373878815, 'reg_lambda': 3.1040372792870367e-06, 'reg_alpha': 0.0017131438846052355, 'n_jobs': 16}"
2383,2019-06-01 08:52:41.397929,tune 1898 by stratified,29,1.886988,1.1e-05,1.935482,4.2e-05,0.048494,"{'max_depth': 4, 'max_bin': 58, 'eta': 0.1811875829422426, 'colsample_bytree': 0.8077052733745183, 'min_child_weight': 600, 'n_estimators': 142, 'subsample': 0.6094418983292402, 'reg_lambda': 0.0003648881938525921, 'reg_alpha': 2.6722013663530638e-06, 'n_jobs': 16}"
2309,2019-06-01 08:45:56.566029,tune 1898 by stratified,29,1.887281,3e-05,1.936105,5.8e-05,0.048824,"{'max_depth': 2, 'max_bin': 54, 'eta': 0.23176930286927705, 'colsample_bytree': 0.7681828968284401, 'min_child_weight': 362, 'n_estimators': 337, 'subsample': 0.6507305906963924, 'reg_lambda': 4.951666956462279e-05, 'reg_alpha': 0.05734747897506657, 'n_jobs': 16}"


In [31]:
db.commit()

In [30]:
df_trial.tail(1)[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','algorithm-init','kfold']].sort_values(by=['val_mae']).head()

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,algorithm-init,kfold
2401,2019-06-01 08:54:18.870441,tune 1898 by stratified,29,1.949857,1.4e-05,1.979048,0.000131,0.029191,"{'max_depth': 2, 'max_bin': 74, 'eta': 0.1893926508427528, 'colsample_bytree': 0.8364814256745211, 'min_child_weight': 449, 'n_estimators': 155, 'subsample': 0.6301772137985538, 'reg_lambda': 0.00047813086456964825, 'reg_alpha': 3.425976342896432e-05, 'n_jobs': 16}","{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}"
