In [22]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import types
import copy

In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [7]:
db = DFDB('../trial/randomforest.pkl', auto_commit=False)

In [10]:
param = {'columns': tsfresh_columns,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': {'cls': 'RandomForestRegressor',
  'init': {'n_estimators': 747,
   'max_depth': 9,
   'max_features': 0.6639543350506906,
   'min_samples_leaf': 0.10968001196429095,
   'random_state': 2519},
  'fit': {}},
}

In [11]:
mytrial = []
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, is_output_feature_importance=True)

In [12]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-18 01:56:49.406266,1071,2.06458,0.001991,2.155781,0.005319,0.091201


In [13]:
#check feature_importances
df_feature_importances = df_trial.loc[0]['df_feature_importances']
sorted_columns = EP.evaluate(df_feature_importances, key='average_permutation_weight')

In [14]:
param = {'columns': sorted_columns[:200],
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'},
 'algorithm': {'cls': 'RandomForestRegressor',
  'init': {'n_estimators': 747,
   'max_depth': 9,
   'max_features': 0.6639543350506906,
   'min_samples_leaf': 0.10968001196429095,
   'random_state': 2519},
  'fit': {}},
}

In [15]:
mytrial =[]
EP.select_features_(df_train, param, mytrial, nfeats_best=10, nfeats_removed_per_try=10, key='average_permutation_weight', remark='group3 RFE')
for trial_i in mytrial:
    db.insert(trial_i)

In [16]:
df_trial = db.select()
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-18 01:56:49.406266,1071,2.06458,0.001991,2.155781,0.005319,0.091201
1,2019-05-18 02:30:47.291326,200,2.067947,0.001957,2.130393,0.005045,0.062446
2,2019-05-18 02:55:56.751364,190,2.067924,0.001964,2.13116,0.004866,0.063235
3,2019-05-18 03:19:32.367112,180,2.068766,0.002031,2.126823,0.005744,0.058057
4,2019-05-18 03:41:29.066369,170,2.068325,0.002031,2.125899,0.005743,0.057574
5,2019-05-18 04:03:29.831165,160,2.068881,0.002042,2.128116,0.005428,0.059235
6,2019-05-18 04:23:11.683709,150,2.068273,0.00206,2.126475,0.005641,0.058202
7,2019-05-18 04:41:25.388854,140,2.068495,0.002049,2.126378,0.005588,0.057883
8,2019-05-18 04:58:23.907756,130,2.068268,0.002049,2.126613,0.005669,0.058345
9,2019-05-18 05:14:06.332461,120,2.068459,0.002046,2.126422,0.005647,0.057963


In [17]:
db.commit()

In [18]:
mytrial =[]
EP.select_features_(df_train, df_trial.loc[17]['param'], mytrial, nfeats_best=20, nfeats_removed_per_try=1, key='average_permutation_weight', remark='group3 RFE2')
for trial_i in mytrial:
    db.insert(trial_i)

In [19]:
df_trial = db.select()
df_trial[df_trial['remark']=='group3 RFE2'][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
21,2019-05-18 07:00:01.593430,40,2.068209,0.002031,2.126173,0.005654,0.057963
22,2019-05-18 07:04:52.406408,39,2.06801,0.002061,2.125217,0.0057,0.057207
23,2019-05-18 07:09:34.605010,38,2.068297,0.002039,2.125658,0.005651,0.05736
24,2019-05-18 07:14:03.582401,37,2.068232,0.002041,2.125593,0.005526,0.057361
25,2019-05-18 07:18:23.496659,36,2.068232,0.002053,2.125668,0.005577,0.057436
26,2019-05-18 07:22:38.740724,35,2.068542,0.002035,2.125561,0.006093,0.057019
27,2019-05-18 07:26:45.832221,34,2.068289,0.002042,2.125545,0.005754,0.057256
28,2019-05-18 07:30:44.120122,33,2.068138,0.002039,2.124512,0.005773,0.056375
29,2019-05-18 07:34:38.525331,32,2.068098,0.00208,2.127243,0.005484,0.059145
30,2019-05-18 07:38:22.715179,31,2.0681,0.002044,2.125249,0.005768,0.057149


In [20]:
db.commit()

In [23]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[40]['param']['columns'])

def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'RandomForestRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 40')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 08:18:19,176] Finished trial#0 resulted in value: 0.15203091377074493. Current best value is 0.15203091377074493 with parameters: {'n_estimators': 389, 'max_depth': 7, 'max_features': 0.9412196272301738, 'min_samples_leaf': 0.11858838926625759, 'random_state': 1344}.
[I 2019-05-18 08:20:40,556] Finished trial#1 resulted in value: 0.14742018273935079. Current best value is 0.14742018273935079 with parameters: {'n_estimators': 993, 'max_depth': 6, 'max_features': 0.8663215564148644, 'min_samples_leaf': 0.11735110329998513, 'random_state': 1677}.
[I 2019-05-18 08:20:42,613] Finished trial#2 resulted in value: 0.0021601955197078997. Current best value is 0.0021601955197078997 with parameters: {'n_estimators': 370, 'max_depth': 9, 'max_features': 0.7870536516140948, 'min_samples_leaf': 0.4081619047795777, 'random_state': 1368}.
[I 2019-05-18 08:21:24,441] Finished trial#3 resulted in value: 0.03448681436415998. Current best value is 0.0021601955197078997 with parameters: {'n_e

[I 2019-05-18 08:27:45,763] Finished trial#28 resulted in value: 0.00217047729260023. Current best value is 0.002150079549674844 with parameters: {'n_estimators': 686, 'max_depth': 13, 'max_features': 0.9032622664763919, 'min_samples_leaf': 0.49113089565809787, 'random_state': 9212}.
[I 2019-05-18 08:27:49,448] Finished trial#29 resulted in value: 0.0021654917752371903. Current best value is 0.002150079549674844 with parameters: {'n_estimators': 686, 'max_depth': 13, 'max_features': 0.9032622664763919, 'min_samples_leaf': 0.49113089565809787, 'random_state': 9212}.
[I 2019-05-18 08:27:52,779] Finished trial#30 resulted in value: 0.0021720009048310967. Current best value is 0.002150079549674844 with parameters: {'n_estimators': 686, 'max_depth': 13, 'max_features': 0.9032622664763919, 'min_samples_leaf': 0.49113089565809787, 'random_state': 9212}.
[I 2019-05-18 08:27:57,998] Finished trial#31 resulted in value: 0.0021645078297672484. Current best value is 0.002150079549674844 with param

[I 2019-05-18 08:37:06,410] Finished trial#56 resulted in value: 0.002163229963808081. Current best value is 0.002150079549674844 with parameters: {'n_estimators': 686, 'max_depth': 13, 'max_features': 0.9032622664763919, 'min_samples_leaf': 0.49113089565809787, 'random_state': 9212}.
[I 2019-05-18 08:37:09,275] Finished trial#57 resulted in value: 0.00214538447047399. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:37:11,169] Finished trial#58 resulted in value: 0.002171335971008242. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:38:05,661] Finished trial#59 resulted in value: 0.1279500759099678. Current best value is 0.00214538447047399 with parameters: {'n

[I 2019-05-18 08:42:04,002] Finished trial#84 resulted in value: 0.0021684760522143374. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:42:06,279] Finished trial#85 resulted in value: 0.0021603383685917825. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:42:58,599] Finished trial#86 resulted in value: 0.14781269749526293. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:43:02,377] Finished trial#87 resulted in value: 0.002164887269594691. Current best value is 0.00214538447047399 with parameters: {

[I 2019-05-18 08:48:06,123] Finished trial#112 resulted in value: 0.027524406192789813. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:48:47,397] Finished trial#113 resulted in value: 0.035852775897868984. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:48:52,090] Finished trial#114 resulted in value: 0.002167433003920818. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:49:27,369] Finished trial#115 resulted in value: 0.037340253173635624. Current best value is 0.00214538447047399 with parameters

[I 2019-05-18 08:55:52,862] Finished trial#140 resulted in value: 0.002168181112663882. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:55:57,567] Finished trial#141 resulted in value: 0.002157024956923964. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:56:44,013] Finished trial#142 resulted in value: 0.037791854963876215. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:56:47,004] Finished trial#143 resulted in value: 0.002162331872142032. Current best value is 0.00214538447047399 with parameters

[I 2019-05-18 08:59:10,784] Finished trial#168 resulted in value: 0.0371169461189405. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:59:14,325] Finished trial#169 resulted in value: 0.002164713250872238. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:59:17,323] Finished trial#170 resulted in value: 0.0021636783483817133. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 08:59:21,217] Finished trial#171 resulted in value: 0.00217020374789437. Current best value is 0.00214538447047399 with parameters: 

[I 2019-05-18 09:03:05,567] Finished trial#196 resulted in value: 0.0021738072443519894. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 09:03:08,392] Finished trial#197 resulted in value: 0.0021570483772468327. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 09:03:10,734] Finished trial#198 resulted in value: 0.002181347558565057. Current best value is 0.00214538447047399 with parameters: {'n_estimators': 513, 'max_depth': 15, 'max_features': 0.8827083655650999, 'min_samples_leaf': 0.4582789811149415, 'random_state': 2005}.
[I 2019-05-18 09:03:13,273] Finished trial#199 resulted in value: 0.0021560409446921935. Current best value is 0.00214538447047399 with paramet

In [24]:
for trial_i in mytrial:
    db.insert(trial_i)

In [29]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 40')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
78,2019-05-18 08:31:09.037848,tune 40,21,2.11185,0.002311,2.145305,0.007731,0.033456
88,2019-05-18 08:35:01.838405,tune 40,21,2.159692,0.001855,2.182495,0.010075,0.022804
174,2019-05-18 08:52:09.149143,tune 40,21,2.193001,0.001675,2.210125,0.009827,0.017123
96,2019-05-18 08:37:00.251487,tune 40,21,2.232539,0.002243,2.245187,0.007521,0.012648
45,2019-05-18 08:21:24.439448,tune 40,21,2.232313,0.002082,2.247657,0.007442,0.015343
48,2019-05-18 08:22:16.729684,tune 40,21,2.232597,0.002142,2.247716,0.007361,0.015119
102,2019-05-18 08:38:48.951491,tune 40,21,2.235607,0.001949,2.252153,0.007751,0.016546
74,2019-05-18 08:28:49.235098,tune 40,21,2.236251,0.002044,2.25239,0.007559,0.01614
172,2019-05-18 08:51:23.373586,tune 40,21,2.236388,0.001999,2.253167,0.007663,0.016778
141,2019-05-18 08:45:38.343988,tune 40,21,2.23679,0.002003,2.253397,0.007745,0.016607


In [27]:
db.commit()

In [30]:
param = copy.deepcopy(df_trial.loc[78]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 78 use stratified')

In [31]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [32]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
242,2019-05-18 09:09:25.478405,21,2.122507,5.5e-05,2.127397,0.000151,0.00489


In [39]:
db.commit()

In [33]:
mytrial =[]
columns_ = copy.deepcopy(df_trial.loc[40]['param']['columns'])

def objective(trial):
        
    n_estimators = trial.suggest_int('n_estimators', 300, 1000)
    max_depth = trial.suggest_int('max_depth', 5, 16)
    max_features = trial.suggest_uniform('max_features', .6, 1)
    min_samples_leaf = trial.suggest_uniform('min_samples_leaf', 0.1, 0.5)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':columns_,
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'stratified'
        },
        'scaler':{
            'cls':'StandardScaler',
        },
        'algorithm':{
            'cls':'RandomForestRegressor',
            'init':{
                "n_estimators":n_estimators,
                "max_depth":max_depth,
                "max_features":max_features,
                "min_samples_leaf":min_samples_leaf,
                "random_state":random_state,
            },
            'fit':{
            },
        },
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 40 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-18 09:11:10,854] Finished trial#0 resulted in value: 2.715027241126506e-05. Current best value is 2.715027241126506e-05 with parameters: {'n_estimators': 406, 'max_depth': 11, 'max_features': 0.6842318374816689, 'min_samples_leaf': 0.32107882846192204, 'random_state': 6125}.
[I 2019-05-18 09:12:06,681] Finished trial#1 resulted in value: 0.009840411876522884. Current best value is 2.715027241126506e-05 with parameters: {'n_estimators': 406, 'max_depth': 11, 'max_features': 0.6842318374816689, 'min_samples_leaf': 0.32107882846192204, 'random_state': 6125}.
[I 2019-05-18 09:12:08,564] Finished trial#2 resulted in value: 1.7861067665055586e-05. Current best value is 1.7861067665055586e-05 with parameters: {'n_estimators': 340, 'max_depth': 13, 'max_features': 0.8482864818518201, 'min_samples_leaf': 0.36730481117542957, 'random_state': 8129}.
[I 2019-05-18 09:12:32,135] Finished trial#3 resulted in value: 0.009566387788642163. Current best value is 1.7861067665055586e-05 with pa

[I 2019-05-18 09:19:41,991] Finished trial#28 resulted in value: 0.0027376946469548314. Current best value is 1.7861067665055586e-05 with parameters: {'n_estimators': 340, 'max_depth': 13, 'max_features': 0.8482864818518201, 'min_samples_leaf': 0.36730481117542957, 'random_state': 8129}.
[I 2019-05-18 09:20:27,245] Finished trial#29 resulted in value: 0.009675465472789354. Current best value is 1.7861067665055586e-05 with parameters: {'n_estimators': 340, 'max_depth': 13, 'max_features': 0.8482864818518201, 'min_samples_leaf': 0.36730481117542957, 'random_state': 8129}.
[I 2019-05-18 09:20:31,417] Finished trial#30 resulted in value: 2.9777246607510423e-05. Current best value is 1.7861067665055586e-05 with parameters: {'n_estimators': 340, 'max_depth': 13, 'max_features': 0.8482864818518201, 'min_samples_leaf': 0.36730481117542957, 'random_state': 8129}.
[I 2019-05-18 09:20:33,403] Finished trial#31 resulted in value: 2.084940060650526e-05. Current best value is 1.7861067665055586e-05 

[I 2019-05-18 09:29:55,718] Finished trial#56 resulted in value: 2.8796399200935045e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:29:57,871] Finished trial#57 resulted in value: 3.0120390687900376e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:30:29,347] Finished trial#58 resulted in value: 0.009043758515672518. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:30:33,424] Finished trial#59 resulted in value: 2.5289039641471313e-05. Current best value is 1.6637338358994974e-05 w

[I 2019-05-18 09:34:58,508] Finished trial#84 resulted in value: 0.009794460859090666. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:35:03,790] Finished trial#85 resulted in value: 3.199994680047683e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:35:05,495] Finished trial#86 resulted in value: 2.2700527843351245e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:35:08,071] Finished trial#87 resulted in value: 2.232422179178646e-05. Current best value is 1.6637338358994974e-05 wit

[I 2019-05-18 09:45:44,652] Finished trial#112 resulted in value: 0.00530353595607381. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:45:47,983] Finished trial#113 resulted in value: 3.012685451448368e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:45:49,781] Finished trial#114 resulted in value: 2.6239141870458497e-05. Current best value is 1.6637338358994974e-05 with parameters: {'n_estimators': 539, 'max_depth': 7, 'max_features': 0.6018693937461534, 'min_samples_leaf': 0.49910759509684555, 'random_state': 8559}.
[I 2019-05-18 09:45:52,825] Finished trial#115 resulted in value: 2.8910853199763918e-05. Current best value is 1.6637338358994974e-05

[I 2019-05-18 09:49:22,503] Finished trial#140 resulted in value: 2.7251192740898878e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:49:25,061] Finished trial#141 resulted in value: 2.956720511381356e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:49:28,317] Finished trial#142 resulted in value: 2.4862017949748404e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:49:30,984] Finished trial#143 resulted in value: 3.09025879481689e-05. Current best value is 1.4218825632511491e-0

[I 2019-05-18 09:52:32,868] Finished trial#168 resulted in value: 3.946335730702125e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:52:35,742] Finished trial#169 resulted in value: 2.8799500760271462e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:52:39,243] Finished trial#170 resulted in value: 2.9203387815537145e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:52:42,472] Finished trial#171 resulted in value: 2.2304282294048372e-05. Current best value is 1.4218825632511491e

[I 2019-05-18 09:58:23,628] Finished trial#196 resulted in value: 2.7750959912077404e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:58:26,898] Finished trial#197 resulted in value: 2.0211272817248613e-05. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:59:00,240] Finished trial#198 resulted in value: 0.00913950657117074. Current best value is 1.4218825632511491e-05 with parameters: {'n_estimators': 414, 'max_depth': 11, 'max_features': 0.7018679795256522, 'min_samples_leaf': 0.4655764656559597, 'random_state': 4665}.
[I 2019-05-18 09:59:04,111] Finished trial#199 resulted in value: 3.144484586975283e-05. Current best value is 1.4218825632511491e-05

In [35]:
for trial_i in mytrial:
    db.insert(trial_i)

In [36]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 40 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
253,2019-05-18 09:16:12.602745,tune 40 by stratified,21,2.084296,7e-06,2.090182,8.3e-05,0.005887
351,2019-05-18 09:44:16.074706,tune 40 by stratified,21,2.085507,1.1e-05,2.091403,9.4e-05,0.005896
298,2019-05-18 09:29:53.828393,tune 40 by stratified,21,2.085704,1.3e-05,2.091414,0.000102,0.00571
342,2019-05-18 09:41:04.581118,tune 40 by stratified,21,2.085798,1.3e-05,2.091657,0.000103,0.00586
430,2019-05-18 09:56:39.711952,tune 40 by stratified,21,2.087954,7e-06,2.093884,8.7e-05,0.005931
288,2019-05-18 09:25:46.244849,tune 40 by stratified,21,2.090994,1.5e-05,2.096708,0.000106,0.005714
338,2019-05-18 09:37:46.954763,tune 40 by stratified,21,2.092881,8e-06,2.098116,8.4e-05,0.005234
256,2019-05-18 09:17:48.218524,tune 40 by stratified,21,2.095128,9e-06,2.100579,8.7e-05,0.005451
375,2019-05-18 09:48:59.389432,tune 40 by stratified,21,2.150335,1.9e-05,2.155001,0.000146,0.004665
258,2019-05-18 09:18:16.679319,tune 40 by stratified,21,2.201473,2.3e-05,2.205638,3.5e-05,0.004164


In [35]:
db.commit()

In [37]:
param = copy.deepcopy(df_trial.loc[253]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 253 use group')

In [38]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [39]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
443,2019-05-18 11:06:48.347881,21,2.070761,0.002008,2.133419,0.005291,0.062658


In [40]:
db.commit()