In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
df_spec_train = pd.read_pickle('../feats/spec_features.pkl')
df_spec_test = pd.read_pickle('../feats/spec_features_test.pkl')

In [6]:
df_train = pd.merge(df_train, df_spec_train, on='index')
df_test = pd.merge(df_test, df_spec_test, on='index')

In [7]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [8]:
len(tsfresh_columns)

9731

In [9]:
db = DFDB('../trial/xgbm.pkl', auto_commit=False)

In [10]:
lgbm_top200 = ['spkt_welch_densitycoeff_2',
 'spkt_welch_density__coeff_3',
 'q25_roll_std_100',
 '3th_peak_freq',
 'min_roll_std_100',
 'q05_roll_std_100',
 'iqr_6',
 'abs_max_8',
 'mfcc_5_abs_mean',
 "number_peaks{'n': 10}",
 'ave10_7',
 'mfcc_13_mean',
 "autocorrelation{'lag': 5}",
 'mfcc_accelerate_8_variance',
 'abs_max_7',
 'mfcc_accelerate_1_kurtosis',
 'q05_roll_std_1000',
 'spkt_welch_density__coeff_42',
 'max_to_min_diff_5',
 'mfcc_13_quantile25',
 'fft_coefficientcoeff_80__attr_"imag"',
 'abs_q25_5',
 'mfcc_5_mean',
 'median__roll_std',
 '5000skewness_max_',
 'fft_coefficientcoeff_6__attr_"abs"',
 'partial_autocorrelationlag_5',
 'abs_min_8',
 'spkt_welch_density__coeff_28',
 'ar_coefficientk_10__coeff_3',
 'abs_q75_7',
 'mfcc_accelerate_15_min',
 'abs_max_4',
 'mfcc_10_quantile25',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_79',
 "value_count{'value': 1}",
 '5000peak_peak_amp_max_',
 'min__roll_std',
 'mfcc_4_rolling_std_mean',
 'q01_roll_std_1000',
 'mfcc_3_abs_q75',
 'mfcc_5_quantile95',
 'fft_coefficientcoeff_16__attr_"imag"',
 'abs_q01_4',
 'mfcc_6_quantile05',
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 '5000quantile25skewness_',
 'mfcc_0_abs_q25',
 'mfcc_delta_3_quantile01',
 'spkt_welch_density__coeff_99',
 'mfcc_9_mean',
 'mfcc_12_mean',
 'q75_roll_mean_10',
 'max_to_min',
 'mfcc_3_abs_mean',
 'kurt_7',
 'mfcc_14_quantile01',
 'mfcc_10_mean',
 'mfcc_delta_13_min',
 'mfcc_delta_13_abs_max',
 '5000no_zero_crossing_mean_',
 'abs_q75_6',
 'mfcc_12_quantile99',
 'mfcc_accelerate_6_skewness',
 'mfcc_5_max',
 '5000smoothness_entropy_',
 '5000median_skewness_',
 'mfcc_accelerate_8_min',
 'abs_min_3',
 '5000quantile75mean_',
 "number_crossing_m{'m': 1}",
 '5000smoothness_std_',
 'mfcc_9_abs_q25',
 'flac3_1_quantile99',
 'mfcc_5_skewness',
 'fft_coefficientcoeff_56__attr_"angle"',
 'fft_coefficientcoeff_70__attr_"abs"',
 'mfcc_10_abs_q75',
 'fft_coefficientcoeff_24__attr_"angle"',
 'med_7',
 'spkt_welch_density__coeff_73',
 'abs_q99_8',
 'ave10_6',
 'spkt_welch_density__coeff_38',
 'skew_1',
 'mfcc_delta_3_abs_q95',
 "change_quantiles{'ql': 0.6, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}",
 'max_to_min_5',
 'mfcc_delta_4_quantile75',
 'abs_max_1',
 '5000crest_factor_quantile75',
 'partial_autocorrelationlag_1',
 'mfcc_delta_11_quantile01',
 'mfcc_accelerate_7_quantile01',
 'mfcc_8_rolling_std_mean',
 'q95_roll_mean_100',
 'mfcc_delta_6_rolling_std_mean',
 'flac3_0_min',
 'fft_coefficientcoeff_36__attr_"abs"',
 'iqr_8',
 'max_9',
 '5000smoothness_quantile05',
 'mfcc_1_kurtosis',
 'mfcc_7_abs_q95',
 'spkt_welch_density__coeff_66',
 'spkt_welch_density__coeff_64',
 'mfcc_3_abs_q95',
 '5000skewness_mean_',
 'mfcc_10_abs_mean',
 '5000quantile99quantile01',
 'mean_change_rate',
 'mfcc_accelerate_5_rolling_std_mean',
 'mfcc_accelerate_5_quantile05',
 '5000rms_median_',
 'flac3_1_abs_mean',
 '5000rms_quantile05',
 '5000quantile75quantile25',
 'mfcc_accelerate_12_min',
 "value_count{'value': -1}",
 'mfcc_12_abs_max',
 'abs_min_5',
 'mfcc_5_abs_q99',
 'mfcc_delta_5_quantile01',
 'mfcc_12_quantile75',
 'fft_coefficientcoeff_56__attr_"imag"',
 'spkt_welch_densitycoeff_5',
 'mfcc_0_abs_q05',
 'mfcc_13_quantile75',
 'mfcc_delta_5_min',
 'mfcc_5_quantile99',
 'fft_coefficientcoeff_8__attr_"angle"',
 'spkt_welch_density__coeff_30',
 'mfcc_accelerate_4_max',
 'mfcc_14_median',
 "change_quantiles{'ql': 0.2, 'qh': 0.4, 'isabs': False, 'f_agg': 'var'}",
 'mfcc_5_median',
 'mfcc_accelerate_5_abs_q75',
 'spkt_welch_density__coeff_59',
 'mfcc_accelerate_5_kurtosis',
 'mfcc_delta_9_quantile99',
 'mfcc_5_quantile25',
 'spkt_welch_density__coeff_113',
 'mfcc_2_quantile05',
 'spkt_welch_density__coeff_58',
 'mfcc_5_abs_q75',
 'spkt_welch_density__coeff_22',
 'spkt_welch_density__coeff_115',
 'spkt_welch_density__coeff_4',
 'mfcc_5_abs_q05',
 'spkt_welch_density__coeff_25',
 'mfcc_5_abs_max',
 'spkt_welch_density__coeff_27',
 'mfcc_4_quantile75',
 'mfcc_3_rolling_std_mean',
 'mfcc_4_median',
 'mfcc_0_quantile01',
 '5000quantile75rssq_',
 'fft_coefficientcoeff_24__attr_"imag"',
 'mfcc_delta_12_kurtosis',
 '5000rms_quantile25',
 'mfcc_13_quantile05',
 'fft_coefficientcoeff_8__attr_"imag"',
 'mfcc_delta_11_rolling_std_mean',
 'mfcc_delta_12_quantile99',
 'mfcc_accelerate_9_abs_q25',
 'mfcc_11_abs_mean',
 'mfcc_delta_15_abs_max',
 'abs_max_2',
 'mfcc_11_abs_std',
 'abs_max_roll_mean_1000',
 'peak_to_average_power_ratio__roll_mean',
 'mfcc_13_median',
 'fft_coefficientcoeff_62__attr_"abs"',
 'mfcc_11_quantile75',
 "quantile{'q': 0.8}",
 'mfcc_delta_3_rolling_std_mean',
 'abs_q99_7',
 'mfcc_delta_12_max',
 'mfcc_12_min',
 'mfcc_10_quantile99',
 'mfcc_14_abs_q99',
 'mfcc_14_quantile05',
 'mfcc_14_quantile25',
 'kurt_1',
 'mfcc_2_median',
 'q01_2',
 'mfcc_2_abs_q25',
 'kurt_8',
 'mfcc_accelerate_6_abs_max',
 'mfcc_1_quantile75',
 'q05_5',
 'abs_q95_9',
 'q05_roll_mean_100',
 'mfcc_10_abs_q25',
 'q75_9',
 'mfcc_15_mean',
 "change_quantiles{'ql': 0.6, 'qh': 0.8, 'isabs': True, 'f_agg': 'mean'}",
 'flac3_0_abs_std',
 "change_quantiles{'ql': 0.4, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}"]

In [11]:
df_trial = db.select()
df_trial['kfold-type'] = df_trial['param'].apply(lambda x: x['kfold']['type'])

In [14]:
df_trial[df_trial['kfold-type']=='group'][['datetime','nfeatures', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']].sort_values(by=['val_mae'], ascending=True).head()

Unnamed: 0,datetime,nfeatures,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark.1
440,2019-05-06 07:48:27.216046,25,tune columns272,1.889476,0.055808,1.94578,0.611434,0.056304,tune columns272
328,2019-05-06 07:29:11.053666,25,tune columns272,1.921549,0.045514,1.946469,0.602273,0.02492,tune columns272
316,2019-05-06 07:27:06.754855,25,tune columns272,1.894958,0.042848,1.947048,0.610165,0.05209,tune columns272
435,2019-05-06 07:47:48.957295,25,tune columns272,1.919623,0.047471,1.947087,0.604345,0.027464,tune columns272
434,2019-05-06 07:47:35.692914,25,tune columns272,1.900924,0.042155,1.947729,0.607891,0.046805,tune columns272


In [17]:
param = {'columns': lgbm_top200,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init':{}},
 'algorithm': {'cls': 'xgb.XGBRegressor',
  'init': {'max_depth': 4,
   'max_bin': 67,
   'eta': 0.342568877200916,
   'colsample_bytree': 0.8864953019985605,
   'min_child_weight': 213,
   'n_estimators': 450,
   'subsample': 0.853698982949453,
   'reg_lambda': 0.982742342106065,
   'reg_alpha': 0.3136457598284541,
   'n_jobs': 16},
  'fit': {}},
        }

In [18]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=20, nfeats_removed_per_try=10, key='average_permutation_weight', remark='rfe to 20 group3')

In [19]:
EP.select_features_(df_train, param, mytrial, nfeats_best=20, nfeats_removed_per_try=10, key='average_model_weight', remark='rfe to 20 group3 model weight')

In [22]:
# pd.DataFrame(mytrial)[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

In [23]:
for trial_i in mytrial:
    db.insert(trial_i)

In [24]:
df_trial = db.select()

In [26]:
df_trial[df_trial['remark']=='rfe to 20 group3'][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1846,2019-05-31 03:58:27.136660,rfe to 20 group3,200,1.205982,0.000182,2.106715,0.00444,0.900733
1847,2019-05-31 04:09:29.695793,rfe to 20 group3,190,1.211057,0.000538,2.099602,0.004261,0.888545
1848,2019-05-31 04:20:08.066918,rfe to 20 group3,180,1.220032,9.8e-05,2.097443,0.004153,0.87741
1849,2019-05-31 04:28:16.141972,rfe to 20 group3,170,1.251079,0.000287,2.076615,0.003928,0.825536
1850,2019-05-31 04:33:53.630082,rfe to 20 group3,160,1.271805,0.000497,2.066525,0.00586,0.79472
1851,2019-05-31 04:39:44.418728,rfe to 20 group3,150,1.288827,0.000626,2.058341,0.006943,0.769514
1852,2019-05-31 04:44:54.479791,rfe to 20 group3,140,1.293057,0.000456,2.049283,0.006572,0.756226
1853,2019-05-31 04:50:17.943489,rfe to 20 group3,130,1.309402,0.000318,2.049743,0.005642,0.740341
1854,2019-05-31 04:56:00.099157,rfe to 20 group3,120,1.320676,0.000428,2.038732,0.004942,0.718056
1855,2019-05-31 05:01:24.725280,rfe to 20 group3,110,1.329129,0.000645,2.034923,0.005551,0.705794


In [27]:
score = df_trial.loc[1863].val_mae

In [29]:
param = df_trial.loc[1863]['param']

In [35]:
def width_frist_rfe(df_train, param, trial, score, df_test=None, remark=None):

    param_ = copy.deepcopy(param)
    columns_ = param_['columns']
    best_score = score
    best_param = param_
    for col in columns_:
        param_['columns'] = list(set(columns_) - set([col]))
        df_his, df_feature_importances, df_valid_pred, df_test_pred = EP.process(df_train, param_, df_test=df_test, trial=trial, is_output_feature_importance=False, remark=remark)
        val_mae_mean = np.mean(df_his.valid)
        if val_mae_mean<best_score:
            best_score = val_mae_mean
            best_param = copy.deepcopy(param_)

    if best_score < score:
        width_frist_rfe(df_train, best_param, trial, best_score, df_test, remark=remark)

    return

In [None]:
mytrial=[]
width_frist_rfe(df_train, param, mytrial, score, df_test=df_test, remark='wf 1863')

In [None]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [None]:
df_trial[(df_trial['remark']=='wf 1863')].sort_values(by=['val_mae'])[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

In [20]:
mytrial = []
#  tune hypterparameters
def objective(trial):
        
    max_depth = trial.suggest_int('max_depth', 2, 6)
    max_bin = trial.suggest_int('max_bin', 10, 100)
    eta = trial.suggest_uniform('eta', 0.01, 0.4)
    colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.6, 1)
    min_child_weight = trial.suggest_int('min_child_weight', 200, 600)
    n_estimators = trial.suggest_int('n_estimators', 100, 500)
    subsample = trial.suggest_uniform('subsample', 0.6, 1.0)
    reg_lambda = trial.suggest_loguniform('reg_lambda', 0.000001, 1.0)
    reg_alpha = trial.suggest_loguniform('reg_alpha', 0.000001, 1.0)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':selected_columns,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'xgb.XGBRegressor',
            'init':{
                "max_depth":max_depth,
                "max_bin":max_bin,
                "eta":eta,
                "colsample_bytree":colsample_bytree,
                "min_child_weight":min_child_weight,
                "n_estimators":n_estimators,
                "subsample":subsample,
                "reg_lambda":reg_lambda,
                "reg_alpha":reg_alpha,
                'n_jobs':4
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1444 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[32m[I 2019-05-24 18:07:58,115][0m Finished trial#0 resulted in value: 0.09654915226932492. Current best value is 0.09654915226932492 with parameters: {'max_depth': 2, 'max_bin': 74, 'eta': 0.3575927818609957, 'colsample_bytree': 0.9314810993170525, 'min_child_weight': 255, 'n_estimators': 463, 'subsample': 0.9725118873482428, 'reg_lambda': 0.11126424611801736, 'reg_alpha': 5.179310231653595e-06, 'random_state': 7733}.[0m
[32m[I 2019-05-24 18:08:19,722][0m Finished trial#1 resulted in value: 0.19048093555966372. Current best value is 0.09654915226932492 with parameters: {'max_depth': 2, 'max_bin': 74, 'eta': 0.3575927818609957, 'colsample_bytree': 0.9314810993170525, 'min_child_weight': 255, 'n_estimators': 463, 'subsample': 0.9725118873482428, 'reg_lambda': 0.11126424611801736, 'reg_alpha': 5.179310231653595e-06, 'random_state': 7733}.[0m
[32m[I 2019-05-24 18:08:28,902][0m Finished trial#2 resulted in value: 0.0825189805910531. Current best value is 0.0825189805910531 with par

[32m[I 2019-05-24 18:10:53,902][0m Finished trial#19 resulted in value: 0.04243787830732163. Current best value is 0.04164538312850546 with parameters: {'max_depth': 2, 'max_bin': 79, 'eta': 0.055115526030071034, 'colsample_bytree': 0.6703701999250644, 'min_child_weight': 546, 'n_estimators': 173, 'subsample': 0.6421071261557975, 'reg_lambda': 0.00017170206803463595, 'reg_alpha': 9.992062468315778e-06, 'random_state': 2174}.[0m
[32m[I 2019-05-24 18:11:02,040][0m Finished trial#20 resulted in value: 0.05533363543695176. Current best value is 0.04164538312850546 with parameters: {'max_depth': 2, 'max_bin': 79, 'eta': 0.055115526030071034, 'colsample_bytree': 0.6703701999250644, 'min_child_weight': 546, 'n_estimators': 173, 'subsample': 0.6421071261557975, 'reg_lambda': 0.00017170206803463595, 'reg_alpha': 9.992062468315778e-06, 'random_state': 2174}.[0m
[32m[I 2019-05-24 18:11:09,490][0m Finished trial#21 resulted in value: 0.08313694765828131. Current best value is 0.04164538312

[32m[I 2019-05-24 18:13:51,736][0m Finished trial#38 resulted in value: 0.10833673226807883. Current best value is 0.04015266006490325 with parameters: {'max_depth': 2, 'max_bin': 100, 'eta': 0.12208002424566186, 'colsample_bytree': 0.7574976142839639, 'min_child_weight': 561, 'n_estimators': 160, 'subsample': 0.6897189341673549, 'reg_lambda': 2.0284672552003527e-05, 'reg_alpha': 0.00014611760060092763, 'random_state': 139}.[0m
[32m[I 2019-05-24 18:14:01,843][0m Finished trial#39 resulted in value: 0.12350300993173415. Current best value is 0.04015266006490325 with parameters: {'max_depth': 2, 'max_bin': 100, 'eta': 0.12208002424566186, 'colsample_bytree': 0.7574976142839639, 'min_child_weight': 561, 'n_estimators': 160, 'subsample': 0.6897189341673549, 'reg_lambda': 2.0284672552003527e-05, 'reg_alpha': 0.00014611760060092763, 'random_state': 139}.[0m
[32m[I 2019-05-24 18:14:06,691][0m Finished trial#40 resulted in value: 0.05363298275198379. Current best value is 0.04015266006

[32m[I 2019-05-24 18:17:12,947][0m Finished trial#57 resulted in value: 0.052765888691753825. Current best value is 0.03669089845194971 with parameters: {'max_depth': 2, 'max_bin': 56, 'eta': 0.3126756353868539, 'colsample_bytree': 0.8693108907279761, 'min_child_weight': 277, 'n_estimators': 100, 'subsample': 0.9277174698987204, 'reg_lambda': 0.00017365837666918308, 'reg_alpha': 5.727717460573909e-05, 'random_state': 2795}.[0m
[32m[I 2019-05-24 18:17:19,265][0m Finished trial#58 resulted in value: 0.09149110241765802. Current best value is 0.03669089845194971 with parameters: {'max_depth': 2, 'max_bin': 56, 'eta': 0.3126756353868539, 'colsample_bytree': 0.8693108907279761, 'min_child_weight': 277, 'n_estimators': 100, 'subsample': 0.9277174698987204, 'reg_lambda': 0.00017365837666918308, 'reg_alpha': 5.727717460573909e-05, 'random_state': 2795}.[0m
[32m[I 2019-05-24 18:17:23,108][0m Finished trial#59 resulted in value: 0.04430596917934166. Current best value is 0.03669089845194

[32m[I 2019-05-24 18:19:29,468][0m Finished trial#76 resulted in value: 0.03543505962904276. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:19:32,460][0m Finished trial#77 resulted in value: 0.03613329603232274. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:19:36,511][0m Finished trial#78 resulted in value: 0.06142428236208545. Current best value is 0.03215300861904 with par

[32m[I 2019-05-24 18:22:00,914][0m Finished trial#96 resulted in value: 0.03704087189358279. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:22:09,759][0m Finished trial#97 resulted in value: 0.14096378994279407. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:22:18,426][0m Finished trial#98 resulted in value: 0.10099036626784623. Current best value is 0.03215300861904 with par

[32m[I 2019-05-24 18:24:55,001][0m Finished trial#116 resulted in value: 0.06343236704509365. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:24:57,815][0m Finished trial#117 resulted in value: 0.036539723687045694. Current best value is 0.03215300861904 with parameters: {'max_depth': 2, 'max_bin': 75, 'eta': 0.21479896890932804, 'colsample_bytree': 0.8567357676247367, 'min_child_weight': 410, 'n_estimators': 100, 'subsample': 0.7222196829638388, 'reg_lambda': 3.65148836718332e-05, 'reg_alpha': 1.104799716997286e-05, 'random_state': 2898}.[0m
[32m[I 2019-05-24 18:25:02,948][0m Finished trial#118 resulted in value: 0.05011605699506957. Current best value is 0.03215300861904 with

[32m[I 2019-05-24 18:27:30,874][0m Finished trial#135 resulted in value: 0.093931845150868. Current best value is 0.030217154784635276 with parameters: {'max_depth': 2, 'max_bin': 95, 'eta': 0.12476969025852838, 'colsample_bytree': 0.9776056476393447, 'min_child_weight': 539, 'n_estimators': 111, 'subsample': 0.6192233800189538, 'reg_lambda': 0.0003048989928800843, 'reg_alpha': 2.8307917457949566e-06, 'random_state': 1069}.[0m
[32m[I 2019-05-24 18:27:40,560][0m Finished trial#136 resulted in value: 0.10231570070205151. Current best value is 0.030217154784635276 with parameters: {'max_depth': 2, 'max_bin': 95, 'eta': 0.12476969025852838, 'colsample_bytree': 0.9776056476393447, 'min_child_weight': 539, 'n_estimators': 111, 'subsample': 0.6192233800189538, 'reg_lambda': 0.0003048989928800843, 'reg_alpha': 2.8307917457949566e-06, 'random_state': 1069}.[0m
[32m[I 2019-05-24 18:27:46,232][0m Finished trial#137 resulted in value: 0.060124083803343394. Current best value is 0.030217154

[32m[I 2019-05-24 18:29:19,875][0m Finished trial#154 resulted in value: 0.03454987886449087. Current best value is 0.02970588008209703 with parameters: {'max_depth': 2, 'max_bin': 98, 'eta': 0.13247579018677344, 'colsample_bytree': 0.910822981876779, 'min_child_weight': 518, 'n_estimators': 100, 'subsample': 0.6800979348206516, 'reg_lambda': 1.088991996638132e-06, 'reg_alpha': 0.038003709960088644, 'random_state': 1347}.[0m
[32m[I 2019-05-24 18:29:26,468][0m Finished trial#155 resulted in value: 0.06209968631253818. Current best value is 0.02970588008209703 with parameters: {'max_depth': 2, 'max_bin': 98, 'eta': 0.13247579018677344, 'colsample_bytree': 0.910822981876779, 'min_child_weight': 518, 'n_estimators': 100, 'subsample': 0.6800979348206516, 'reg_lambda': 1.088991996638132e-06, 'reg_alpha': 0.038003709960088644, 'random_state': 1347}.[0m
[32m[I 2019-05-24 18:29:29,569][0m Finished trial#156 resulted in value: 0.02805787112781474. Current best value is 0.0280578711278147

[32m[I 2019-05-24 18:31:51,718][0m Finished trial#174 resulted in value: 0.057035717057714326. Current best value is 0.02805787112781474 with parameters: {'max_depth': 2, 'max_bin': 89, 'eta': 0.03920011747338854, 'colsample_bytree': 0.922180110327549, 'min_child_weight': 543, 'n_estimators': 100, 'subsample': 0.6241314051606546, 'reg_lambda': 1.0237667772951773e-05, 'reg_alpha': 0.9304736908819962, 'random_state': 458}.[0m
[32m[I 2019-05-24 18:31:56,083][0m Finished trial#175 resulted in value: 0.04762313875465133. Current best value is 0.02805787112781474 with parameters: {'max_depth': 2, 'max_bin': 89, 'eta': 0.03920011747338854, 'colsample_bytree': 0.922180110327549, 'min_child_weight': 543, 'n_estimators': 100, 'subsample': 0.6241314051606546, 'reg_lambda': 1.0237667772951773e-05, 'reg_alpha': 0.9304736908819962, 'random_state': 458}.[0m
[32m[I 2019-05-24 18:32:04,709][0m Finished trial#176 resulted in value: 0.05980916482909658. Current best value is 0.02805787112781474 w

[32m[I 2019-05-24 18:33:56,313][0m Finished trial#194 resulted in value: 0.04789263553739756. Current best value is 0.02805787112781474 with parameters: {'max_depth': 2, 'max_bin': 89, 'eta': 0.03920011747338854, 'colsample_bytree': 0.922180110327549, 'min_child_weight': 543, 'n_estimators': 100, 'subsample': 0.6241314051606546, 'reg_lambda': 1.0237667772951773e-05, 'reg_alpha': 0.9304736908819962, 'random_state': 458}.[0m
[32m[I 2019-05-24 18:34:06,547][0m Finished trial#195 resulted in value: 0.08583071781683436. Current best value is 0.02805787112781474 with parameters: {'max_depth': 2, 'max_bin': 89, 'eta': 0.03920011747338854, 'colsample_bytree': 0.922180110327549, 'min_child_weight': 543, 'n_estimators': 100, 'subsample': 0.6241314051606546, 'reg_lambda': 1.0237667772951773e-05, 'reg_alpha': 0.9304736908819962, 'random_state': 458}.[0m
[32m[I 2019-05-24 18:34:09,990][0m Finished trial#196 resulted in value: 0.036284938348048225. Current best value is 0.02805787112781474 w

In [21]:
for trial_i in mytrial:
    db.insert(trial_i)

In [27]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 1444 by stratified')&(df_trial['mae_diff']<.05)][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].sort_values(by=['val_mae'])

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1646,2019-05-24 18:07:58.010608,tune 1444 by stratified,12,1.892733,1.8e-05,1.942439,8.4e-05,0.049705
1781,2019-05-24 18:27:30.754147,tune 1444 by stratified,12,1.897909,2.6e-05,1.946174,9.8e-05,0.048265
1659,2019-05-24 18:10:09.885777,tune 1444 by stratified,12,1.897719,1.6e-05,1.946963,0.000162,0.049245
1689,2019-05-24 18:14:38.247460,tune 1444 by stratified,12,1.898627,1.2e-05,1.947239,8.3e-05,0.048612
1721,2019-05-24 18:19:26.337447,tune 1444 by stratified,12,1.900512,3.9e-05,1.947267,9.3e-05,0.046755
1790,2019-05-24 18:28:44.545773,tune 1444 by stratified,12,1.899469,1.2e-05,1.947416,7.8e-05,0.047947
1753,2019-05-24 18:23:33.811422,tune 1444 by stratified,12,1.902664,1.9e-05,1.947674,0.000149,0.045009
1746,2019-05-24 18:22:37.559336,tune 1444 by stratified,12,1.901846,1.9e-05,1.947704,4.9e-05,0.045858
1698,2019-05-24 18:16:31.372664,tune 1444 by stratified,12,1.899234,6e-06,1.947899,0.000114,0.048664
1671,2019-05-24 18:11:31.337186,tune 1444 by stratified,12,1.902978,2.2e-05,1.948845,0.000102,0.045867


In [24]:
db.commit()