In [5]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

In [6]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [5]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [6]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [7]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [7]:
db = DFDB('../trial/extratrees.pkl', auto_commit=False)

In [8]:
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])

In [10]:
df_trial[['datetime','nfeatures', 'kfold', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']]

Unnamed: 0,datetime,nfeatures,kfold,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark.1
0,2019-05-10 08:42:07.651275,1071,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.499165e-06,1.468636e-14,2.269274,0.4896016,2.269272,
1,2019-05-10 09:02:37.071088,200,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.197445e-06,9.774471e-15,2.260614,0.4579565,2.260612,
2,2019-05-10 09:16:09.476415,190,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.155385e-06,1.348192e-14,2.255028,0.4687152,2.255027,
3,2019-05-10 09:28:58.440117,180,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.146699e-06,5.994585e-15,2.240094,0.4792072,2.240093,
4,2019-05-10 09:41:04.616695,170,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.162662e-06,6.818891e-15,2.225713,0.4677708,2.225712,
5,2019-05-10 09:52:28.280265,160,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.145904e-06,1.329546e-14,2.214935,0.4840267,2.214934,
6,2019-05-10 10:03:09.978947,150,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.146723e-06,1.277012e-14,2.209481,0.4743001,2.209479,
7,2019-05-10 10:13:08.307155,140,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.156019e-06,1.259827e-14,2.20367,0.460615,2.203669,
8,2019-05-10 10:22:23.215791,130,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.120365e-06,7.589212e-15,2.204208,0.4581244,2.204207,
9,2019-05-10 10:30:54.595849,120,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",,1.104875e-06,1.124369e-14,2.20944,0.4558679,2.209439,


In [13]:
param = {'columns': ['q25_roll_std_100',
  'abs_q25_5',
  'abs_q01_4',
  'iqr_6',
  'abs_q75_6',
  'mean_change_rate',
  'abs_q25_7'],
 'kfold': {'n_splits': 8,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init': {}},
 'algorithm': {'cls': 'ExtraTreesRegressor',
  'init': {'n_estimators': 690,
   'max_depth': 16,
   'max_features': 0.8718750306718887,
   'min_samples_leaf': 0.10040427643601896,
   'random_state': 921},
  'fit': {}},
 'feature_importance': {'is_output': False,
  'permutation_feature_importance': False,
  'permutation_random_state': 1}}

In [14]:
param_idx = 628
column_idx = 1
db_ = db
df_trial_ = df_trial
mytrial = []
columns = copy.deepcopy(df_trial_.loc[column_idx]['param']['columns'])
# param = copy.deepcopy(df_trial_.loc[param_idx]['param'])
# param['algorithm']['init'] = {'logging_level': 'Silent', **param['algorithm']['init']}
# param['scaler']['init'] = {}
# param['algorithm']['fit'] = {}

selected_columns = EP.revert_rfe(df_train, param, columns, df_test, mytrial, start_columns=[columns[0]], limit=20, remark='start from top1 column')
print(len(selected_columns))
for trial_i in mytrial:
    db_.insert(trial_i)
df_trial_ = db_.select()
df_trial_['kfold'] = df_trial_['param'].apply(lambda x: x['kfold'])


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was conve


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.


Data with input dtype int64 was converted to float64 by StandardScaler.



20


In [15]:
df_trial_[df_trial_['remark']=='start from top1 column'][['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
628,2019-05-22 09:44:07.599641,start from top1 column,1,2.895647,0.0004323794,2.895797,0.000433,0.00015
629,2019-05-22 09:44:09.683322,start from top1 column,2,2.884002,0.0002240458,2.88401,0.000229,8e-06
630,2019-05-22 09:44:12.201774,start from top1 column,3,2.714619,0.0007036758,2.71479,0.000679,0.000171
631,2019-05-22 09:44:15.150582,start from top1 column,4,2.610375,0.000933146,2.610522,0.000883,0.000147
632,2019-05-22 09:44:18.578889,start from top1 column,5,2.527644,0.0006887183,2.52778,0.000632,0.000136
633,2019-05-22 09:44:23.108903,start from top1 column,6,2.35407,0.0004432315,2.354314,0.000394,0.000244
634,2019-05-22 09:44:28.519659,start from top1 column,7,2.280364,0.000129047,2.280876,9.8e-05,0.000512
635,2019-05-22 09:44:33.669441,start from top1 column,8,2.326241,9.609145e-05,2.326497,9.1e-05,0.000257
636,2019-05-22 09:44:38.807574,start from top1 column,8,2.342567,0.0001830038,2.342879,0.00016,0.000312
637,2019-05-22 09:44:44.271200,start from top1 column,8,2.283377,0.0001885805,2.284008,0.000167,0.000631


In [10]:
selected_columns = ['q05_2',
 "number_peaks{'n': 1}",
 'abs_q01_6',
 'abs_q95_2',
 '5000smoothness_quantile25',
 '5000std_median_',
 '5000smoothness_median_',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q75_6',
 "number_peaks{'n': 3}",
 'q75_roll_std_10',
 "number_peaks{'n': 10}",
 '5000min_quantile75',
 '5000smoothness_quantile05',
 "number_peaks{'n': 5}",
 'abs_q01_2',
 '5000smoothness_mean_',
 'min_roll_std_100',
 'abs_q05_2',
 'q01_roll_std_1000']

In [16]:
db_.commit()

In [18]:
mytrial =[]
columns_ = selected_columns

#  tune hypterparameters
def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 8,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'ExtraTreesRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 946')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[32m[I 2019-05-24 12:47:28,591][0m Finished trial#0 resulted in value: 0.05530655157024777. Current best value is 0.05530655157024777 with parameters: {'n_estimators': 720, 'max_depth': 13, 'max_features': 0.8419372229435225, 'min_samples_leaf': 0.41451028803883194, 'random_state': 430}.[0m
[32m[I 2019-05-24 12:47:58,559][0m Finished trial#1 resulted in value: 0.07014590263136515. Current best value is 0.05530655157024777 with parameters: {'n_estimators': 720, 'max_depth': 13, 'max_features': 0.8419372229435225, 'min_samples_leaf': 0.41451028803883194, 'random_state': 430}.[0m
[32m[I 2019-05-24 12:48:13,210][0m Finished trial#2 resulted in value: 0.051681460253345024. Current best value is 0.051681460253345024 with parameters: {'n_estimators': 674, 'max_depth': 16, 'max_features': 0.7174244673921306, 'min_samples_leaf': 0.4666042909031354, 'random_state': 5249}.[0m
[32m[I 2019-05-24 12:48:37,199][0m Finished trial#3 resulted in value: 0.063993017566271. Current best value is

[32m[I 2019-05-24 12:55:33,936][0m Finished trial#28 resulted in value: 0.05481819080985244. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 12:55:50,424][0m Finished trial#29 resulted in value: 0.0660430185505833. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 12:56:08,878][0m Finished trial#30 resulted in value: 0.05813415582986256. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 12:56:19,467][0m Finished trial#31 resulted in value: 0.05149847430790173. Current best value

[32m[I 2019-05-24 13:04:16,726][0m Finished trial#56 resulted in value: 0.06014176773136588. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:04:26,214][0m Finished trial#57 resulted in value: 0.05397566105450707. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:04:44,546][0m Finished trial#58 resulted in value: 0.07475095988656179. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:04:55,670][0m Finished trial#59 resulted in value: 0.05720766378499062. Current best valu

[32m[I 2019-05-24 13:12:44,558][0m Finished trial#84 resulted in value: 0.055009314507144844. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:12:57,450][0m Finished trial#85 resulted in value: 0.058716306767531846. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:13:12,155][0m Finished trial#86 resulted in value: 0.06118696655940383. Current best value is 0.04494310007623157 with parameters: {'n_estimators': 465, 'max_depth': 15, 'max_features': 0.9022754212153651, 'min_samples_leaf': 0.4929284432276416, 'random_state': 2211}.[0m
[32m[I 2019-05-24 13:13:27,733][0m Finished trial#87 resulted in value: 0.06774641093883578. Current best va

[32m[I 2019-05-24 13:17:51,972][0m Finished trial#112 resulted in value: 0.049517452212461566. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:18:05,029][0m Finished trial#113 resulted in value: 0.0574632278327642. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:18:18,866][0m Finished trial#114 resulted in value: 0.061552561314382276. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:18:28,084][0m Finished trial#115 resulted in value: 0.06562936787998609. Current best va

[32m[I 2019-05-24 13:23:38,196][0m Finished trial#140 resulted in value: 0.058782624523702864. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:23:46,752][0m Finished trial#141 resulted in value: 0.049019219070077925. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:23:54,019][0m Finished trial#142 resulted in value: 0.05236137963293481. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:24:03,125][0m Finished trial#143 resulted in value: 0.04940135841826423. Current best v

[32m[I 2019-05-24 13:28:03,684][0m Finished trial#168 resulted in value: 0.0791034695788721. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:28:12,248][0m Finished trial#169 resulted in value: 0.05042909720673496. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:28:20,398][0m Finished trial#170 resulted in value: 0.053250997702087126. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:28:30,099][0m Finished trial#171 resulted in value: 0.06481239684318477. Current best val

[32m[I 2019-05-24 13:32:42,776][0m Finished trial#196 resulted in value: 0.07764769253845194. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:32:50,003][0m Finished trial#197 resulted in value: 0.05495518604553985. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:33:00,764][0m Finished trial#198 resulted in value: 0.052900412893403984. Current best value is 0.0427820050033211 with parameters: {'n_estimators': 419, 'max_depth': 13, 'max_features': 0.8014428445111527, 'min_samples_leaf': 0.4960874503683193, 'random_state': 1669}.[0m
[32m[I 2019-05-24 13:33:10,388][0m Finished trial#199 resulted in value: 0.05785245641420704. Current best va

In [19]:
for trial_i in mytrial:
    db.insert(trial_i)

In [20]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 946')&(df_trial['mae_diff']<.1)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1079,2019-05-24 13:21:45.115765,tune 946,20,2.121052,0.014251,2.16311,0.585886,0.042057
992,2019-05-24 13:01:47.079091,tune 946,20,2.124968,0.014103,2.166282,0.592204,0.041314
961,2019-05-24 12:51:44.754025,tune 946,20,2.126868,0.014357,2.167812,0.589335,0.040944
1029,2019-05-24 13:12:14.063136,tune 946,20,2.127637,0.014204,2.168664,0.591819,0.041028
987,2019-05-24 13:00:10.078939,tune 946,20,2.128339,0.014287,2.169322,0.591283,0.040983
951,2019-05-24 12:49:20.359354,tune 946,20,2.140189,0.014308,2.178597,0.59735,0.038408
1052,2019-05-24 13:16:39.538963,tune 946,20,2.142235,0.014369,2.180065,0.596826,0.03783
1082,2019-05-24 13:22:31.947759,tune 946,20,2.141518,0.014399,2.180301,0.596507,0.038782
971,2019-05-24 12:54:36.279986,tune 946,20,2.154036,0.014475,2.190599,0.601188,0.036564
1115,2019-05-24 13:28:03.576012,tune 946,20,2.160367,0.014409,2.196383,0.603286,0.036015


In [21]:
db.commit()

In [22]:
mytrial =[]
columns_ = selected_columns

#  tune hypterparameters
def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'ExtraTreesRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 946 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[32m[I 2019-05-24 15:04:10,902][0m Finished trial#0 resulted in value: 0.00011254177211725702. Current best value is 0.00011254177211725702 with parameters: {'n_estimators': 357, 'max_depth': 7, 'max_features': 0.6933299974537142, 'min_samples_leaf': 0.41372077113279215, 'random_state': 2764}.[0m
[32m[I 2019-05-24 15:04:13,221][0m Finished trial#1 resulted in value: 0.001019838781759535. Current best value is 0.00011254177211725702 with parameters: {'n_estimators': 357, 'max_depth': 7, 'max_features': 0.6933299974537142, 'min_samples_leaf': 0.41372077113279215, 'random_state': 2764}.[0m
[32m[I 2019-05-24 15:04:19,955][0m Finished trial#2 resulted in value: 0.0006325948087444184. Current best value is 0.00011254177211725702 with parameters: {'n_estimators': 357, 'max_depth': 7, 'max_features': 0.6933299974537142, 'min_samples_leaf': 0.41372077113279215, 'random_state': 2764}.[0m
[32m[I 2019-05-24 15:04:27,236][0m Finished trial#3 resulted in value: 0.0005303313436160429. Curr

[32m[I 2019-05-24 15:06:29,895][0m Finished trial#28 resulted in value: 4.028489452638984e-05. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:06:37,204][0m Finished trial#29 resulted in value: 0.0008842116069579538. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:06:42,670][0m Finished trial#30 resulted in value: 0.0002804017399476091. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:06:46,603][0m Finished trial#31 resulted in value: 7.196162772131918e-05. 

[32m[I 2019-05-24 15:08:41,142][0m Finished trial#56 resulted in value: 0.00022595013928709503. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:08:45,912][0m Finished trial#57 resulted in value: 3.630647583377849e-05. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:08:50,724][0m Finished trial#58 resulted in value: 0.0020317367925527866. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:08:57,022][0m Finished trial#59 resulted in value: 0.0008130635382180606.

[32m[I 2019-05-24 15:10:30,012][0m Finished trial#84 resulted in value: 0.00019620362644285388. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:10:32,705][0m Finished trial#85 resulted in value: 0.0006187890697173803. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:10:39,079][0m Finished trial#86 resulted in value: 0.001283028332594466. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:10:42,995][0m Finished trial#87 resulted in value: 0.00033448113614366153.

[32m[I 2019-05-24 15:12:30,348][0m Finished trial#112 resulted in value: 0.0005632974764881127. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:12:34,194][0m Finished trial#113 resulted in value: 0.0001610048907139684. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:12:39,247][0m Finished trial#114 resulted in value: 0.0008893264932608878. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:12:44,121][0m Finished trial#115 resulted in value: 0.00084787265775616

[32m[I 2019-05-24 15:14:39,461][0m Finished trial#140 resulted in value: 0.0013848513304368652. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:14:49,351][0m Finished trial#141 resulted in value: 0.0008860431486562051. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:14:53,325][0m Finished trial#142 resulted in value: 0.000651355642043684. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:14:59,978][0m Finished trial#143 resulted in value: 0.001271583781659483

[32m[I 2019-05-24 15:16:47,786][0m Finished trial#168 resulted in value: 0.0003835691663057. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:16:52,088][0m Finished trial#169 resulted in value: 0.0011768381741825093. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:16:56,798][0m Finished trial#170 resulted in value: 0.00027085947388680054. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:16:59,943][0m Finished trial#171 resulted in value: 3.46577817479173e-05.

[32m[I 2019-05-24 15:18:45,516][0m Finished trial#196 resulted in value: 0.00011500957055604002. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:18:51,073][0m Finished trial#197 resulted in value: 0.0008627667659222653. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:18:54,371][0m Finished trial#198 resulted in value: 0.0007081444780090979. Current best value is 3.7136661963612434e-06 with parameters: {'n_estimators': 384, 'max_depth': 6, 'max_features': 0.6404431738785726, 'min_samples_leaf': 0.46490129147074005, 'random_state': 8231}.[0m
[32m[I 2019-05-24 15:18:58,014][0m Finished trial#199 resulted in value: 0.0010085266886319

In [23]:
for trial_i in mytrial:
    db.insert(trial_i)

In [24]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 946 by stratified')&(df_trial['mae_diff']<.1)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1163,2019-05-24 15:05:45.525715,tune 946 by stratified,20,2.126614,2.757785e-07,2.127704,1.4e-05,0.00109
1159,2019-05-24 15:05:15.768656,tune 946 by stratified,20,2.139358,8.502e-08,2.140075,1.9e-05,0.000717
1337,2019-05-24 15:18:18.184621,tune 946 by stratified,20,2.139614,6.283632e-06,2.140697,1.9e-05,0.001083
1250,2019-05-24 15:11:54.696847,tune 946 by stratified,20,2.146981,3.437572e-06,2.147934,1.8e-05,0.000952
1290,2019-05-24 15:14:59.867920,tune 946 by stratified,20,2.150964,1.727687e-07,2.151555,1e-05,0.000591
1205,2019-05-24 15:08:50.625697,tune 946 by stratified,20,2.151565,7.558878e-06,2.152509,1.1e-05,0.000944
1182,2019-05-24 15:07:04.779834,tune 946 by stratified,20,2.152706,2.62811e-06,2.15328,2.1e-05,0.000574
1235,2019-05-24 15:10:49.350072,tune 946 by stratified,20,2.158712,6.449166e-06,2.159476,2.5e-05,0.000765
1239,2019-05-24 15:11:04.948213,tune 946 by stratified,20,2.174194,1.883772e-06,2.17488,2.2e-05,0.000686
1257,2019-05-24 15:12:24.003779,tune 946 by stratified,20,2.175629,1.154902e-05,2.176312,2.8e-05,0.000683


In [25]:
db.commit()