In [20]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [9]:
db = DFDB('../trial/extratrees.pkl', auto_commit=False)

In [10]:
param={'columns': tsfresh_columns,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': {'cls': 'ExtraTreesRegressor',
  'init': {'n_estimators': 449,
   'max_depth': 13,
   'max_features': 0.9606948036865893,
   'min_samples_leaf': 0.10365931931330866,
   'random_state': 1425},
  'fit': {}},
}

In [11]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=10, nfeats_removed_per_try=50, key='average_permutation_weight', remark='group3 RFE')
for trial_i in mytrial:
    db.insert(trial_i)

In [12]:
df_trial = db.select()
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-17 12:19:03.804349,1071,2.058534,0.00142,2.127612,0.006139,0.069078
1,2019-05-17 13:44:13.230610,1021,2.06709,0.001858,2.108106,0.009386,0.041016
2,2019-05-17 14:50:22.082731,971,2.073114,0.002601,2.101241,0.010461,0.028127
3,2019-05-17 15:50:46.874394,921,2.075351,0.002834,2.101508,0.011004,0.026156
4,2019-05-17 16:47:13.173954,871,2.076047,0.002818,2.100727,0.011006,0.02468
5,2019-05-17 17:38:38.571094,821,2.076265,0.00277,2.102267,0.010817,0.026002
6,2019-05-17 18:26:15.011827,771,2.075679,0.002846,2.09933,0.011203,0.023651
7,2019-05-17 19:12:41.130864,721,2.075495,0.002861,2.101522,0.010822,0.026027
8,2019-05-17 19:53:34.398837,671,2.076067,0.002792,2.100533,0.011048,0.024466
9,2019-05-17 20:30:12.881000,621,2.075455,0.002842,2.098977,0.01075,0.023522


In [13]:
db.commit()

In [15]:
mytrial =[]
EP.select_features_(df_train, df_trial.loc[20]['param'], mytrial, nfeats_best=10, nfeats_removed_per_try=5, key='average_permutation_weight', remark='group3 RFE2')
for trial_i in mytrial:
    db.insert(trial_i)

In [18]:
df_trial = db.select()
df_trial[df_trial['remark']=='group3 RFE2'][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
22,2019-05-18 01:36:18.799666,71,2.076379,0.002829,2.100452,0.011047,0.024073
23,2019-05-18 01:39:29.475277,66,2.077149,0.002769,2.101262,0.010885,0.024112
24,2019-05-18 01:42:21.981955,61,2.076823,0.002801,2.09997,0.010957,0.023147
25,2019-05-18 01:44:58.619239,56,2.078457,0.002721,2.1008,0.010861,0.022343
26,2019-05-18 01:47:20.500921,51,2.079682,0.002667,2.101819,0.010922,0.022137
27,2019-05-18 01:49:28.467015,46,2.078846,0.002682,2.100784,0.010835,0.021938
28,2019-05-18 01:51:22.271136,41,2.08103,0.002697,2.102035,0.010966,0.021005
29,2019-05-18 01:53:01.903880,36,2.082098,0.002655,2.103447,0.01087,0.021349
30,2019-05-18 01:54:27.309147,31,2.083964,0.002592,2.103171,0.010742,0.019207
31,2019-05-18 01:55:38.757217,26,2.088347,0.002761,2.101585,0.011729,0.013238


In [17]:
db.commit()

In [21]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[31]['param']['columns'])

#  tune hypterparameters
def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'ExtraTreesRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 31')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 02:19:36,736] Finished trial#0 resulted in value: 0.0050894843376959. Current best value is 0.0050894843376959 with parameters: {'n_estimators': 382, 'max_depth': 7, 'max_features': 0.6079740494623934, 'min_samples_leaf': 0.3054629525707597, 'random_state': 1571}.
[I 2019-05-18 02:19:44,300] Finished trial#1 resulted in value: 0.007106717702670673. Current best value is 0.0050894843376959 with parameters: {'n_estimators': 382, 'max_depth': 7, 'max_features': 0.6079740494623934, 'min_samples_leaf': 0.3054629525707597, 'random_state': 1571}.
[I 2019-05-18 02:19:58,776] Finished trial#2 resulted in value: 0.028383279745097436. Current best value is 0.0050894843376959 with parameters: {'n_estimators': 382, 'max_depth': 7, 'max_features': 0.6079740494623934, 'min_samples_leaf': 0.3054629525707597, 'random_state': 1571}.
[I 2019-05-18 02:20:02,613] Finished trial#3 resulted in value: 0.004284617257594717. Current best value is 0.004284617257594717 with parameters: {'n_estimator

[I 2019-05-18 02:23:07,341] Finished trial#28 resulted in value: 0.0020804426957422187. Current best value is 0.0007729862768836773 with parameters: {'n_estimators': 388, 'max_depth': 8, 'max_features': 0.7873141968684848, 'min_samples_leaf': 0.4521015406507274, 'random_state': 6370}.
[I 2019-05-18 02:23:11,296] Finished trial#29 resulted in value: 0.0001544224933720231. Current best value is 0.0001544224933720231 with parameters: {'n_estimators': 363, 'max_depth': 8, 'max_features': 0.7769834125962415, 'min_samples_leaf': 0.42641174786288655, 'random_state': 4293}.
[I 2019-05-18 02:23:15,755] Finished trial#30 resulted in value: 0.002758678691924982. Current best value is 0.0001544224933720231 with parameters: {'n_estimators': 363, 'max_depth': 8, 'max_features': 0.7769834125962415, 'min_samples_leaf': 0.42641174786288655, 'random_state': 4293}.
[I 2019-05-18 02:23:19,286] Finished trial#31 resulted in value: 0.0008881845900825139. Current best value is 0.0001544224933720231 with para

[I 2019-05-18 02:26:08,736] Finished trial#56 resulted in value: 0.000853535534643246. Current best value is 0.0001544224933720231 with parameters: {'n_estimators': 363, 'max_depth': 8, 'max_features': 0.7769834125962415, 'min_samples_leaf': 0.42641174786288655, 'random_state': 4293}.
[I 2019-05-18 02:26:16,212] Finished trial#57 resulted in value: 0.0018789830344064918. Current best value is 0.0001544224933720231 with parameters: {'n_estimators': 363, 'max_depth': 8, 'max_features': 0.7769834125962415, 'min_samples_leaf': 0.42641174786288655, 'random_state': 4293}.
[I 2019-05-18 02:26:19,479] Finished trial#58 resulted in value: 0.0053917356346006905. Current best value is 0.0001544224933720231 with parameters: {'n_estimators': 363, 'max_depth': 8, 'max_features': 0.7769834125962415, 'min_samples_leaf': 0.42641174786288655, 'random_state': 4293}.
[I 2019-05-18 02:26:26,054] Finished trial#59 resulted in value: 0.002155478982627481. Current best value is 0.0001544224933720231 with para

[I 2019-05-18 02:30:10,419] Finished trial#84 resulted in value: 0.001025857235389241. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:30:17,566] Finished trial#85 resulted in value: 0.002274947312624229. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:30:27,649] Finished trial#86 resulted in value: 0.004517515852718372. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:30:39,331] Finished trial#87 resulted in value: 0.006716699265660618. Current best value is 6.68917978773872e-05 with parameters: {

[I 2019-05-18 02:33:36,895] Finished trial#112 resulted in value: 0.009001230640000888. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:33:42,437] Finished trial#113 resulted in value: 0.007108296159053132. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:33:47,156] Finished trial#114 resulted in value: 0.0025267240918934435. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:33:54,444] Finished trial#115 resulted in value: 0.0025316222234748956. Current best value is 6.68917978773872e-05 with paramet

[I 2019-05-18 02:37:40,536] Finished trial#140 resulted in value: 0.006010629022545092. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:37:50,016] Finished trial#141 resulted in value: 0.0024101767859992743. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:37:57,869] Finished trial#142 resulted in value: 0.0012586639453022828. Current best value is 6.68917978773872e-05 with parameters: {'n_estimators': 738, 'max_depth': 10, 'max_features': 0.655682696220891, 'min_samples_leaf': 0.4514812547578928, 'random_state': 1513}.
[I 2019-05-18 02:38:04,593] Finished trial#143 resulted in value: 0.004102493809624118. Current best value is 6.68917978773872e-05 with paramet

[I 2019-05-18 02:40:49,710] Finished trial#168 resulted in value: 0.0013210301418392506. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:40:58,373] Finished trial#169 resulted in value: 0.00691379834715369. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:41:05,936] Finished trial#170 resulted in value: 0.0057052114108454835. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:41:13,447] Finished trial#171 resulted in value: 0.0017990232455131299. Current best value is 5.057993529946858e-05 with par

[I 2019-05-18 02:44:29,756] Finished trial#196 resulted in value: 0.00026354889533936174. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:44:36,662] Finished trial#197 resulted in value: 0.0019089939908365902. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:44:44,822] Finished trial#198 resulted in value: 0.014961253991104458. Current best value is 5.057993529946858e-05 with parameters: {'n_estimators': 746, 'max_depth': 8, 'max_features': 0.6591715323995949, 'min_samples_leaf': 0.4403434459042438, 'random_state': 1823}.
[I 2019-05-18 02:44:53,928] Finished trial#199 resulted in value: 0.0015744446611485041. Current best value is 5.057993529946858e-05 with p

In [22]:
for trial_i in mytrial:
    db.insert(trial_i)

In [26]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 31')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
38,2019-05-18 02:19:58.774681,tune 31,26,2.087228,0.002689,2.10074,0.011794,0.013511
106,2019-05-18 02:28:02.968220,tune 31,26,2.096824,0.002917,2.107731,0.011098,0.010907
170,2019-05-18 02:36:55.845698,tune 31,26,2.096855,0.002657,2.10849,0.011098,0.011635
168,2019-05-18 02:36:31.553186,tune 31,26,2.097166,0.002743,2.1089,0.011653,0.011734
70,2019-05-18 02:23:49.624177,tune 31,26,2.104083,0.002617,2.114081,0.010877,0.009998
139,2019-05-18 02:32:35.815365,tune 31,26,2.107934,0.002582,2.117755,0.010808,0.009821
52,2019-05-18 02:21:56.562468,tune 31,26,2.112076,0.002707,2.121235,0.010151,0.009158
234,2019-05-18 02:44:44.799108,tune 31,26,2.121468,0.002395,2.128497,0.010967,0.007029
44,2019-05-18 02:20:51.276682,tune 31,26,2.127635,0.002389,2.133555,0.01027,0.00592
86,2019-05-18 02:25:35.054788,tune 31,26,2.128958,0.002528,2.135937,0.010452,0.006979


In [27]:
db.commit()

In [28]:
param = copy.deepcopy(df_trial.loc[38]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 38 use stratified')

In [29]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [30]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
236,2019-05-18 06:42:38.618614,26,2.090315,3e-06,2.091928,2.6e-05,0.001613


In [31]:
db.commit()

In [32]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[31]['param']['columns'])

#  tune hypterparameters
def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'ExtraTreesRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 31 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 06:52:53,878] Finished trial#0 resulted in value: 0.0013919887895316116. Current best value is 0.0013919887895316116 with parameters: {'n_estimators': 490, 'max_depth': 5, 'max_features': 0.7588264461176745, 'min_samples_leaf': 0.368778750076565, 'random_state': 8428}.
[I 2019-05-18 06:52:59,718] Finished trial#1 resulted in value: 0.00015975684167758775. Current best value is 0.00015975684167758775 with parameters: {'n_estimators': 634, 'max_depth': 5, 'max_features': 0.6531683699354258, 'min_samples_leaf': 0.461450941631973, 'random_state': 8955}.
[I 2019-05-18 06:53:14,716] Finished trial#2 resulted in value: 0.001468141648769292. Current best value is 0.00015975684167758775 with parameters: {'n_estimators': 634, 'max_depth': 5, 'max_features': 0.6531683699354258, 'min_samples_leaf': 0.461450941631973, 'random_state': 8955}.
[I 2019-05-18 06:53:23,075] Finished trial#3 resulted in value: 0.0023335043589044475. Current best value is 0.00015975684167758775 with parameter

[I 2019-05-18 06:57:52,507] Finished trial#28 resulted in value: 0.0013146972256065416. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 06:58:04,770] Finished trial#29 resulted in value: 0.000455200653695291. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 06:58:12,630] Finished trial#30 resulted in value: 0.00015616862590425804. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 06:58:25,665] Finished trial#31 resulted in value: 0.001390219752152613. Current best value is 6.240043953774098e-06 with parame

[I 2019-05-18 07:01:57,414] Finished trial#56 resulted in value: 0.00011602541979556042. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:02:10,871] Finished trial#57 resulted in value: 0.0019681361801170663. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:02:16,798] Finished trial#58 resulted in value: 0.00040390789199020805. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:02:35,966] Finished trial#59 resulted in value: 0.0013854286796257534. Current best value is 6.240043953774098e-06 with par

[I 2019-05-18 07:06:05,924] Finished trial#84 resulted in value: 0.0001260913758202872. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:06:11,937] Finished trial#85 resulted in value: 0.00036247840715163663. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:06:18,333] Finished trial#86 resulted in value: 0.0009769458283864305. Current best value is 6.240043953774098e-06 with parameters: {'n_estimators': 904, 'max_depth': 8, 'max_features': 0.9905947830545193, 'min_samples_leaf': 0.4999748930109439, 'random_state': 3272}.
[I 2019-05-18 07:06:26,069] Finished trial#87 resulted in value: 0.00022891614978614558. Current best value is 6.240043953774098e-06 with par

[I 2019-05-18 07:10:27,656] Finished trial#112 resulted in value: 0.0009879453553031537. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:10:35,177] Finished trial#113 resulted in value: 0.00012459046767619727. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:10:46,880] Finished trial#114 resulted in value: 0.0008633487749083354. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:10:57,064] Finished trial#115 resulted in value: 0.0008141731244302816. Current best value is 1.2919228737593711e-06 w

[I 2019-05-18 07:14:48,718] Finished trial#140 resulted in value: 0.002558472290972921. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:14:57,930] Finished trial#141 resulted in value: 8.29764938653974e-05. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:15:04,140] Finished trial#142 resulted in value: 0.00041758024815315647. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:15:14,535] Finished trial#143 resulted in value: 0.000489493811629546. Current best value is 1.2919228737593711e-06 with

[I 2019-05-18 07:18:46,694] Finished trial#168 resulted in value: 0.0015477238207405992. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:19:02,487] Finished trial#169 resulted in value: 0.0025734006778688925. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:19:09,094] Finished trial#170 resulted in value: 0.0002568009986588781. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:19:23,741] Finished trial#171 resulted in value: 0.0014046273175260834. Current best value is 1.2919228737593711e-06 wi

[I 2019-05-18 07:23:12,725] Finished trial#196 resulted in value: 0.0005317861689038005. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:23:19,013] Finished trial#197 resulted in value: 7.11242024322433e-05. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:23:26,983] Finished trial#198 resulted in value: 0.0005268972276264538. Current best value is 1.2919228737593711e-06 with parameters: {'n_estimators': 741, 'max_depth': 6, 'max_features': 0.8675190121637301, 'min_samples_leaf': 0.4810693206559361, 'random_state': 2555}.
[I 2019-05-18 07:23:35,681] Finished trial#199 resulted in value: 0.00021477279720592083. Current best value is 1.2919228737593711e-06 wi

In [33]:
for trial_i in mytrial:
    db.insert(trial_i)

In [34]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 31 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
348,2019-05-18 07:10:17.473837,tune 31 by stratified,26,2.094741,3e-06,2.0963,2.2e-05,0.001559
298,2019-05-18 07:02:50.899293,tune 31 by stratified,26,2.098361,4e-06,2.099842,1.8e-05,0.00148
401,2019-05-18 07:18:09.249450,tune 31 by stratified,26,2.099309,1.1e-05,2.100723,2.6e-05,0.001414
409,2019-05-18 07:19:49.326856,tune 31 by stratified,26,2.102819,3e-06,2.103971,2e-05,0.001152
248,2019-05-18 06:54:38.989644,tune 31 by stratified,26,2.102814,1e-06,2.104052,2.6e-05,0.001237
261,2019-05-18 06:57:09.715539,tune 31 by stratified,26,2.111951,3e-06,2.112943,1.5e-05,0.000992
406,2019-05-18 07:19:02.467774,tune 31 by stratified,26,2.115084,1.9e-05,2.1163,2.7e-05,0.001216
270,2019-05-18 06:58:54.716802,tune 31 by stratified,26,2.115968,8e-06,2.117067,1.6e-05,0.001098
345,2019-05-18 07:09:37.742942,tune 31 by stratified,26,2.117832,1.2e-05,2.118752,2.5e-05,0.00092
377,2019-05-18 07:14:48.702307,tune 31 by stratified,26,2.117563,1.9e-05,2.118771,2.2e-05,0.001208


In [35]:
db.commit()

In [36]:
param = copy.deepcopy(df_trial.loc[348]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 348 use group')

In [37]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [38]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
437,2019-05-18 07:30:23.386685,26,2.09126,0.002742,2.103574,0.011848,0.012313


In [39]:
db.commit()