In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso

import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [7]:
common_columns50 = ["number_peaks{'n': 5}",
 'abs_q75_6',
 'q01_roll_std_100',
 '5000crest_factor_quantile75',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 'abs_q05_6',
 '5000form_factor_quantile75',
 '5000smoothness_quantile05',
 "quantile{'q': 0.9}",
 'abs_q75_2',
 'q01_roll_std_1000',
 '5000quantile75mean_',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 "number_peaks{'n': 1}",
 '5000smoothness_mean_',
 'abs_q25_5',
 '5000std_quantile05',
 '5000smoothness_std_',
 '5000smoothness_median_',
 '5000median_variance_',
 'spkt_welch_density__coeff_4',
 '5000variance_quantile25',
 'abs_q95_2',
 'abs_q75_7',
 'q05_5',
 '5000smoothness_entropy_',
 '5000smoothness_quantile25',
 'q01_2',
 'q05_roll_std_100',
 '3th_peak_freq',
 "autocorrelation{'lag': 5}",
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'q01_roll_std_10',
 'abs_q01_3',
 '5000variance_median_',
 'q75_roll_std_10',
 '5000skewness_max_',
 'iqr_3',
 'abs_q01_5',
 'q75_roll_mean_10',
 '5000quantile99median_',
 'iqr_6',
 'Hilbert_mean_6',
 'q05_roll_std_1000',
 "number_peaks{'n': 3}",
 'spkt_welch_densitycoeff_2',
 '5000std_median_',
 '5000std_quantile25']

In [8]:
common_columns75 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 '5000smoothness_quantile05',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'abs_q75_7',
 'q05_5',
 'q05_roll_std_100',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'iqr_6',
 'q05_roll_std_1000']

In [9]:
common_columns95 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'q05_roll_std_100',
 'iqr_6',
 'q05_roll_std_1000']

In [10]:
db = DFDB('../trial2/ridge.pkl', auto_commit=False)

In [21]:
param={'algorithm': {'cls': 'Ridge',
  'fit': {},
  'init': {'alpha': 50000,
   'fit_intercept': True,
   'normalize': False,
   'copy_X': True,
   'max_iter': None,
   'tol': 0.001,
   'solver': 'auto',
   'random_state': 42}},
 'columns': tsfresh_columns,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},#stratified
 'scaler': {'cls': 'StandardScaler'}}

In [22]:
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='try tsfresh_columns')
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [23]:
df_trial[['datetime','nfeatures', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-20 01:30:35.441360,52,try common_columns50,2.167347,0.003255,2.178708,0.013918,0.011361
1,2019-05-20 01:30:47.288629,15,try common_columns75,2.24918,0.003296,2.250681,0.01328,0.001501
2,2019-05-20 01:31:09.021913,8,try common_columns95,2.296754,0.003231,2.296672,0.013187,-8.3e-05
3,2019-05-20 01:31:51.785441,1071,try tsfresh_columns,2.057224,0.002123,2.163357,0.008752,0.106134


In [24]:
db.commit()

In [30]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_int('alpha', 10000, 100000)
    tol = trial.suggest_uniform('tol', 0.0001, 0.01)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={'algorithm': {'cls': 'Ridge',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'copy_X': True,
       'max_iter': None,
       'tol': tol,
       'solver': 'auto',
       'random_state': random_state}},
     'columns': common_columns50,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'group'},#stratified
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 0')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 02:39:10,155] Finished a trial resulted in value: 0.01813528379286237. Current best value is 0.01813528379286237 with parameters: {'alpha': 72830, 'tol': 0.0002505899834102033, 'random_state': 103}.
[I 2019-05-20 02:39:10,559] Finished a trial resulted in value: 0.01928390495378464. Current best value is 0.01813528379286237 with parameters: {'alpha': 72830, 'tol': 0.0002505899834102033, 'random_state': 103}.
[I 2019-05-20 02:39:10,832] Finished a trial resulted in value: 0.01592918270722863. Current best value is 0.01592918270722863 with parameters: {'alpha': 84705, 'tol': 0.003985350095951181, 'random_state': 8967}.
[I 2019-05-20 02:39:11,184] Finished a trial resulted in value: 0.034269995971885146. Current best value is 0.01592918270722863 with parameters: {'alpha': 84705, 'tol': 0.003985350095951181, 'random_state': 8967}.
[I 2019-05-20 02:39:11,499] Finished a trial resulted in value: 0.01628166038656535. Current best value is 0.01592918270722863 with parameters: {'a

[I 2019-05-20 02:39:20,396] Finished a trial resulted in value: 0.02602094650705088. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:20,642] Finished a trial resulted in value: 0.03497137580233955. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:20,887] Finished a trial resulted in value: 0.014373895306799155. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:21,136] Finished a trial resulted in value: 0.016741562151082957. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:21,386] Finished a trial resulted in value: 0.015454898691877394. Current best value is 0.013741581321704823 with paramete

[I 2019-05-20 02:39:30,924] Finished a trial resulted in value: 0.01376370497959303. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:31,253] Finished a trial resulted in value: 0.01726596267396082. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:31,550] Finished a trial resulted in value: 0.02002463834049615. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:31,859] Finished a trial resulted in value: 0.01889787550146246. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:32,155] Finished a trial resulted in value: 0.017944909312000688. Current best value is 0.013741581321704823 with parameters

[I 2019-05-20 02:39:42,320] Finished a trial resulted in value: 0.01613850787395997. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:42,587] Finished a trial resulted in value: 0.0167945075679605. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:42,853] Finished a trial resulted in value: 0.024969224143305564. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:43,121] Finished a trial resulted in value: 0.031370345782992214. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:43,394] Finished a trial resulted in value: 0.014725398774898634. Current best value is 0.013741581321704823 with parameter

[I 2019-05-20 02:39:53,598] Finished a trial resulted in value: 0.013760712429599708. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:53,883] Finished a trial resulted in value: 0.014410933838766906. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:54,173] Finished a trial resulted in value: 0.01576661436800568. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:54,536] Finished a trial resulted in value: 0.016781554047524988. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:39:54,874] Finished a trial resulted in value: 0.015264018468548883. Current best value is 0.013741581321704823 with paramet

[I 2019-05-20 02:40:05,763] Finished a trial resulted in value: 0.014912618894361487. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:40:06,093] Finished a trial resulted in value: 0.01374569357746399. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:40:06,407] Finished a trial resulted in value: 0.0422240718973777. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:40:06,743] Finished a trial resulted in value: 0.014103839881420933. Current best value is 0.013741581321704823 with parameters: {'alpha': 99991, 'tol': 0.007235855991966803, 'random_state': 1140}.
[I 2019-05-20 02:40:07,234] Finished a trial resulted in value: 0.015245124664717823. Current best value is 0.013741581321704823 with parameter

In [31]:
for trial_i in mytrial:
    db.insert(trial_i)

In [33]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 0')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
133,2019-05-20 02:39:46.454189,tune 0,52,2.101847,0.002689,2.136976,0.011682,0.035129
60,2019-05-20 02:39:25.133704,tune 0,52,2.106568,0.00273,2.139061,0.012024,0.032494
50,2019-05-20 02:39:22.390226,tune 0,52,2.107211,0.002736,2.139366,0.012066,0.032155
10,2019-05-20 02:39:12.023099,tune 0,52,2.112166,0.002781,2.141784,0.012381,0.029619
168,2019-05-20 02:39:57.544643,tune 0,52,2.115193,0.002809,2.143357,0.012556,0.028164
66,2019-05-20 02:39:26.819663,tune 0,52,2.118943,0.002845,2.145419,0.012757,0.026476
100,2019-05-20 02:39:37.227885,tune 0,52,2.122716,0.002884,2.147613,0.012933,0.024898
170,2019-05-20 02:39:58.147284,tune 0,52,2.127811,0.002935,2.150659,0.013145,0.022848
30,2019-05-20 02:39:17.233735,tune 0,52,2.130106,0.002957,2.152061,0.013234,0.021955
38,2019-05-20 02:39:19.376520,tune 0,52,2.13175,0.002973,2.153085,0.013295,0.021335


In [34]:
param = copy.deepcopy(df_trial.loc[133]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 133 use stratified')

In [36]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [37]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
204,2019-05-20 02:42:11.518203,52,2.112391,4e-06,2.113513,3.6e-05,0.001122


In [38]:
db.commit()

In [40]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_int('alpha', 10000, 100000)
    tol = trial.suggest_uniform('tol', 0.0001, 0.01)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={'algorithm': {'cls': 'Ridge',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'copy_X': True,
       'max_iter': None,
       'tol': tol,
       'solver': 'auto',
       'random_state': random_state}},
     'columns': common_columns50,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'stratified'},#stratified
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 0 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 02:46:28,075] Finished a trial resulted in value: 0.0009074092983064706. Current best value is 0.0009074092983064706 with parameters: {'alpha': 59171, 'tol': 0.007913167035645827, 'random_state': 8039}.
[I 2019-05-20 02:46:28,609] Finished a trial resulted in value: 0.001332202856808979. Current best value is 0.0009074092983064706 with parameters: {'alpha': 59171, 'tol': 0.007913167035645827, 'random_state': 8039}.
[I 2019-05-20 02:46:29,052] Finished a trial resulted in value: 0.0017000648859360732. Current best value is 0.0009074092983064706 with parameters: {'alpha': 59171, 'tol': 0.007913167035645827, 'random_state': 8039}.
[I 2019-05-20 02:46:29,324] Finished a trial resulted in value: 0.0015847352692633743. Current best value is 0.0009074092983064706 with parameters: {'alpha': 59171, 'tol': 0.007913167035645827, 'random_state': 8039}.
[I 2019-05-20 02:46:29,675] Finished a trial resulted in value: 0.0007760798530899604. Current best value is 0.0007760798530899604 wi

[I 2019-05-20 02:46:42,704] Finished a trial resulted in value: 0.0008338633181644724. Current best value is 0.0007138257881172013 with parameters: {'alpha': 99858, 'tol': 0.004204056758334616, 'random_state': 9976}.
[I 2019-05-20 02:46:43,081] Finished a trial resulted in value: 0.000752451368259194. Current best value is 0.0007138257881172013 with parameters: {'alpha': 99858, 'tol': 0.004204056758334616, 'random_state': 9976}.
[I 2019-05-20 02:46:43,429] Finished a trial resulted in value: 0.0012489388671850553. Current best value is 0.0007138257881172013 with parameters: {'alpha': 99858, 'tol': 0.004204056758334616, 'random_state': 9976}.
[I 2019-05-20 02:46:43,764] Finished a trial resulted in value: 0.0009515227083106621. Current best value is 0.0007138257881172013 with parameters: {'alpha': 99858, 'tol': 0.004204056758334616, 'random_state': 9976}.
[I 2019-05-20 02:46:44,074] Finished a trial resulted in value: 0.0014938667318986079. Current best value is 0.0007138257881172013 wi

[I 2019-05-20 02:46:57,430] Finished a trial resulted in value: 0.0007877846016071933. Current best value is 0.0007129558849617716 with parameters: {'alpha': 99990, 'tol': 0.007708550392910725, 'random_state': 7820}.
[I 2019-05-20 02:46:57,746] Finished a trial resulted in value: 0.0007797366404360752. Current best value is 0.0007129558849617716 with parameters: {'alpha': 99990, 'tol': 0.007708550392910725, 'random_state': 7820}.
[I 2019-05-20 02:46:58,064] Finished a trial resulted in value: 0.0007952838100551939. Current best value is 0.0007129558849617716 with parameters: {'alpha': 99990, 'tol': 0.007708550392910725, 'random_state': 7820}.
[I 2019-05-20 02:46:58,349] Finished a trial resulted in value: 0.000810738683120722. Current best value is 0.0007129558849617716 with parameters: {'alpha': 99990, 'tol': 0.007708550392910725, 'random_state': 7820}.
[I 2019-05-20 02:46:58,627] Finished a trial resulted in value: 0.0015993558682723642. Current best value is 0.0007129558849617716 wi

[I 2019-05-20 02:47:08,361] Finished a trial resulted in value: 0.0007788732472640641. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:08,657] Finished a trial resulted in value: 0.0009606979038748348. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:08,972] Finished a trial resulted in value: 0.0007699424190136826. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:09,264] Finished a trial resulted in value: 0.0007487598590379432. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:09,560] Finished a trial resulted in value: 0.0007696155243538881. Current best value is 0.00071290279670044

[I 2019-05-20 02:47:20,385] Finished a trial resulted in value: 0.0007799648083331751. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:20,707] Finished a trial resulted in value: 0.000769664926110286. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:21,080] Finished a trial resulted in value: 0.0007141813612471091. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:21,430] Finished a trial resulted in value: 0.0007489188125100409. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:21,782] Finished a trial resulted in value: 0.0007671676493519043. Current best value is 0.000712902796700442

[I 2019-05-20 02:47:33,181] Finished a trial resulted in value: 0.0007292314095280435. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:33,503] Finished a trial resulted in value: 0.0008263666012157628. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:33,819] Finished a trial resulted in value: 0.000756949637793887. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:34,162] Finished a trial resulted in value: 0.0007903988693227323. Current best value is 0.0007129027967004426 with parameters: {'alpha': 100000, 'tol': 0.005254566062617908, 'random_state': 9479}.
[I 2019-05-20 02:47:34,493] Finished a trial resulted in value: 0.0007698156947451444. Current best value is 0.000712902796700442

In [41]:
for trial_i in mytrial:
    db.insert(trial_i)

In [42]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 0 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
228,2019-05-20 02:46:36.962602,tune 0 by stratified,52,2.114446,3e-06,2.115535,3.6e-05,0.001089
329,2019-05-20 02:47:12.231780,tune 0 by stratified,52,2.118512,3e-06,2.11952,3.5e-05,0.001007
249,2019-05-20 02:46:45.458087,tune 0 by stratified,52,2.124563,3e-06,2.125458,3.5e-05,0.000894
291,2019-05-20 02:47:00.866893,tune 0 by stratified,52,2.126036,3e-06,2.126911,3.6e-05,0.000875
388,2019-05-20 02:47:32.508708,tune 0 by stratified,52,2.127294,2e-06,2.128154,3.6e-05,0.00086
207,2019-05-20 02:46:29.050081,tune 0 by stratified,52,2.131934,2e-06,2.132731,3.6e-05,0.000797
260,2019-05-20 02:46:50.338657,tune 0 by stratified,52,2.133011,2e-06,2.133807,3.6e-05,0.000796
347,2019-05-20 02:47:18.378053,tune 0 by stratified,52,2.134873,2e-06,2.135663,3.7e-05,0.000789
210,2019-05-20 02:46:30.102680,tune 0 by stratified,52,2.136211,2e-06,2.136992,3.7e-05,0.00078
283,2019-05-20 02:46:58.613954,tune 0 by stratified,52,2.140295,2e-06,2.141042,3.8e-05,0.000747


In [43]:
param = copy.deepcopy(df_trial.loc[228]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 228 use group')

In [44]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [45]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
405,2019-05-20 02:51:40.118659,52,2.104126,0.002708,2.137954,0.011853,0.033828


In [46]:
db.commit()