In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso

import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [7]:
common_columns50 = ["number_peaks{'n': 5}",
 'abs_q75_6',
 'q01_roll_std_100',
 '5000crest_factor_quantile75',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 'abs_q05_6',
 '5000form_factor_quantile75',
 '5000smoothness_quantile05',
 "quantile{'q': 0.9}",
 'abs_q75_2',
 'q01_roll_std_1000',
 '5000quantile75mean_',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 "number_peaks{'n': 1}",
 '5000smoothness_mean_',
 'abs_q25_5',
 '5000std_quantile05',
 '5000smoothness_std_',
 '5000smoothness_median_',
 '5000median_variance_',
 'spkt_welch_density__coeff_4',
 '5000variance_quantile25',
 'abs_q95_2',
 'abs_q75_7',
 'q05_5',
 '5000smoothness_entropy_',
 '5000smoothness_quantile25',
 'q01_2',
 'q05_roll_std_100',
 '3th_peak_freq',
 "autocorrelation{'lag': 5}",
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'q01_roll_std_10',
 'abs_q01_3',
 '5000variance_median_',
 'q75_roll_std_10',
 '5000skewness_max_',
 'iqr_3',
 'abs_q01_5',
 'q75_roll_mean_10',
 '5000quantile99median_',
 'iqr_6',
 'Hilbert_mean_6',
 'q05_roll_std_1000',
 "number_peaks{'n': 3}",
 'spkt_welch_densitycoeff_2',
 '5000std_median_',
 '5000std_quantile25']

In [8]:
common_columns75 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 '5000smoothness_quantile05',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'abs_q75_7',
 'q05_5',
 'q05_roll_std_100',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'iqr_6',
 'q05_roll_std_1000']

In [9]:
common_columns95 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'q05_roll_std_100',
 'iqr_6',
 'q05_roll_std_1000']

In [10]:
db = DFDB('../trial2/lasso.pkl', auto_commit=False)

In [13]:
df_trial_lasso = pd.read_pickle('../trial/lasso.pkl')
df_trial_lasso['kfold-type'] = df_trial_lasso['param'].apply(lambda x: x['kfold']['type'])
df_trial_lasso.loc[8:8][['datetime','remark','kfold-type' ,'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,kfold-type,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
8,2019-05-16 11:54:52.999400,,group,165,2.106014,0.002088,2.156098,0.008084,0.050085


In [21]:
param={'algorithm': {'cls': 'Lasso',
  'fit': {},
  'init': {'alpha': 0.1,
   'fit_intercept': True,
   'normalize': False,
   'precompute': False,
   'copy_X': True,
   'max_iter': 1000,
   'tol': 0.0001,
   'warm_start': False,
   'positive': False,
   'random_state': 42,
   'selection': 'cyclic'}},
 'columns': common_columns95,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'}}

In [22]:
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='try common_columns95')
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [23]:
df_trial[['datetime','nfeatures', 'remark', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,remark,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-20 01:46:44.423552,52,try common_columns50,2.114255,0.002398,2.159265,0.008267,0.04501
1,2019-05-20 01:47:16.151042,15,try common_columns75,2.16636,0.003511,2.170198,0.013431,0.003837
2,2019-05-20 01:47:29.738056,8,try common_columns95,2.176899,0.003513,2.179305,0.013346,0.002405


In [24]:
db.commit()

In [25]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_uniform('alpha', 0.001, 10)
    tol = trial.suggest_uniform('tol', 0.00001, 0.001)
    max_iter = trial.suggest_int('max_iter', 500, 5000)
    random_state = trial.suggest_int('random_state', 1, 9999)
    
        
    args={'algorithm': {'cls': 'Lasso',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'precompute': False,
       'copy_X': True,
       'max_iter': max_iter,
       'tol': tol,
       'warm_start': False,
       'positive': False,
       'random_state': random_state,
       'selection': 'cyclic'}},
     'columns': common_columns50,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'group'},
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 0')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 01:51:14,459] Finished a trial resulted in value: 0.0014474656766977202. Current best value is 0.0014474656766977202 with parameters: {'alpha': 1.6510771500121462, 'tol': 0.0005409033132695452, 'max_iter': 1321, 'random_state': 4889}.
[I 2019-05-20 01:51:14,938] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0014474656766977202 with parameters: {'alpha': 1.6510771500121462, 'tol': 0.0005409033132695452, 'max_iter': 1321, 'random_state': 4889}.
[I 2019-05-20 01:51:15,498] Finished a trial resulted in value: 0.0012364307674792986. Current best value is 0.0012364307674792986 with parameters: {'alpha': 1.80324797822226, 'tol': 0.0006008103833439174, 'max_iter': 2879, 'random_state': 532}.
[I 2019-05-20 01:51:15,785] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0012364307674792986 with parameters: {'alpha': 1.80324797822226, 'tol': 0.0006008103833439174, 'max_iter': 2879, 'random_state': 532}.
[I 2019-05-2

[I 2019-05-20 01:51:40,833] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:51:41,132] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:51:41,390] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:51:41,643] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01

[I 2019-05-20 01:52:10,642] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:52:11,500] Finished a trial resulted in value: 0.004298510493534384. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:52:11,961] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:52:12,679] Finished a trial resulted in value: 0.0007807356449259135. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 0

[I 2019-05-20 01:52:42,178] Finished a trial resulted in value: 0.0010823520127538713. Current best value is 0.0005053210240190272 with parameters: {'alpha': 1.2058693772674953, 'tol': 0.0005257301608640711, 'max_iter': 4342, 'random_state': 63}.
[I 2019-05-20 01:52:42,935] Finished a trial resulted in value: 0.0002674710449033673. Current best value is 0.0002674710449033673 with parameters: {'alpha': 1.2999163577678936, 'tol': 0.0005252176535936848, 'max_iter': 2401, 'random_state': 6565}.
[I 2019-05-20 01:52:43,757] Finished a trial resulted in value: 0.0014544800551584871. Current best value is 0.0002674710449033673 with parameters: {'alpha': 1.2999163577678936, 'tol': 0.0005252176535936848, 'max_iter': 2401, 'random_state': 6565}.
[I 2019-05-20 01:52:44,080] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.0002674710449033673 with parameters: {'alpha': 1.2999163577678936, 'tol': 0.0005252176535936848, 'max_iter': 2401, 'random_state': 6565}.
[I 2019

[I 2019-05-20 01:53:01,903] Finished a trial resulted in value: 0.002164956108893131. Current best value is 0.00018810737636716532 with parameters: {'alpha': 1.2903205407019744, 'tol': 0.0005936882220585523, 'max_iter': 2499, 'random_state': 4320}.
[I 2019-05-20 01:53:02,718] Finished a trial resulted in value: 0.00010845209110131165. Current best value is 0.00010845209110131165 with parameters: {'alpha': 1.2814333380014282, 'tol': 0.000777963904853294, 'max_iter': 2680, 'random_state': 6365}.
[I 2019-05-20 01:53:03,384] Finished a trial resulted in value: 0.0009306950082599914. Current best value is 0.00010845209110131165 with parameters: {'alpha': 1.2814333380014282, 'tol': 0.000777963904853294, 'max_iter': 2680, 'random_state': 6365}.
[I 2019-05-20 01:53:04,194] Finished a trial resulted in value: 0.003954868379136233. Current best value is 0.00010845209110131165 with parameters: {'alpha': 1.2814333380014282, 'tol': 0.000777963904853294, 'max_iter': 2680, 'random_state': 6365}.
[I 2

[I 2019-05-20 01:53:41,704] Finished a trial resulted in value: 0.04244975967854364. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:53:42,090] Finished a trial resulted in value: 0.002164956108893131. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:53:42,789] Finished a trial resulted in value: 0.000807283131036804. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:53:43,553] Finished a trial resulted in value: 0.0008668921796028425. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-2

[I 2019-05-20 01:54:36,091] Finished a trial resulted in value: 0.002164956108893131. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:54:36,753] Finished a trial resulted in value: 0.0014086447217753915. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:54:37,095] Finished a trial resulted in value: 0.002164956108893131. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05-20 01:54:37,684] Finished a trial resulted in value: 0.0007807806966918804. Current best value is 3.053675792988322e-05 with parameters: {'alpha': 1.267640614222313, 'tol': 0.0005606322379021064, 'max_iter': 2042, 'random_state': 4756}.
[I 2019-05

In [26]:
for trial_i in mytrial:
    db.insert(trial_i)

In [31]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 0')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
103,2019-05-20 01:52:46.174696,tune 0,52,2.10715,0.002435,2.155821,0.008016,0.048671
82,2019-05-20 01:52:33.221832,tune 0,52,2.133446,0.002471,2.16543,0.009388,0.031984
119,2019-05-20 01:52:57.017587,tune 0,52,2.134934,0.002463,2.166038,0.009576,0.031104
140,2019-05-20 01:53:09.322449,tune 0,52,2.13521,0.002464,2.166165,0.009607,0.030956
172,2019-05-20 01:53:48.459675,tune 0,52,2.136336,0.002472,2.16666,0.009747,0.030324
16,2019-05-20 01:51:28.711566,tune 0,52,2.145295,0.002543,2.171161,0.010706,0.025866
189,2019-05-20 01:54:35.386788,tune 0,52,2.146335,0.002546,2.171792,0.010803,0.025457
57,2019-05-20 01:52:07.015579,tune 0,52,2.155701,0.002637,2.177258,0.0116,0.021557
77,2019-05-20 01:52:30.059601,tune 0,52,2.158582,0.002675,2.179058,0.011791,0.020476
162,2019-05-20 01:53:41.671303,tune 0,52,2.161101,0.002718,2.180568,0.011942,0.019467


In [32]:
param = copy.deepcopy(df_trial.loc[103]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 103 use stratified')

In [33]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [34]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
203,2019-05-20 02:40:49.310949,52,2.121924,3e-06,2.123358,7e-06,0.001433


In [35]:
db.commit()

In [39]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_uniform('alpha', 0.001, 10)
    tol = trial.suggest_uniform('tol', 0.00001, 0.001)
    max_iter = trial.suggest_int('max_iter', 500, 5000)
    random_state = trial.suggest_int('random_state', 1, 9999)
    
        
    args={'algorithm': {'cls': 'Lasso',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'precompute': False,
       'copy_X': True,
       'max_iter': max_iter,
       'tol': tol,
       'warm_start': False,
       'positive': False,
       'random_state': random_state,
       'selection': 'cyclic'}},
     'columns': common_columns50,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'stratified'},
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 0 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 02:45:29,700] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:30,415] Finished a trial resulted in value: 0.0019200977512308021. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:30,645] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:30,904] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:45:44,547] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:44,804] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:45,064] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:45,341] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:45:55,308] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:55,605] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:55,880] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:45:56,162] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:46:05,839] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:06,126] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:06,418] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:06,734] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:46:20,399] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:20,712] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:21,029] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:21,343] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:46:35,584] Finished a trial resulted in value: 0.0018655631394626938. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:36,247] Finished a trial resulted in value: 0.0009456112754237209. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:36,844] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:37,602] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-2

[I 2019-05-20 02:46:54,289] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.
[I 2019-05-20 02:46:55,653] Finished a trial resulted in value: 0.0023691469394321743. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 9.187930178542258, 'tol': 7.44597864854199e-05, 'max_iter': 2382, 'random_state': 7481}.


In [40]:
for trial_i in mytrial:
    db.insert(trial_i)

In [42]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 0 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
234,2019-05-20 02:45:43.776455,tune 0 by stratified,52,2.082728,4.288519e-06,2.085442,3e-05,0.002714
332,2019-05-20 02:46:19.095292,tune 0 by stratified,52,2.131172,5.498504e-07,2.132366,1e-05,0.001194
324,2019-05-20 02:46:14.713098,tune 0 by stratified,52,2.156235,2.207856e-08,2.157367,2e-05,0.001132
262,2019-05-20 02:45:52.786118,tune 0 by stratified,52,2.18539,5.136591e-08,2.186547,1.5e-05,0.001157
403,2019-05-20 02:46:55.586400,tune 0 by stratified,52,2.196732,1.037882e-08,2.19781,1.5e-05,0.001078
311,2019-05-20 02:46:08.993893,tune 0 by stratified,52,2.225599,2.424852e-07,2.226519,1.2e-05,0.000919
362,2019-05-20 02:46:31.268086,tune 0 by stratified,52,2.245932,4.08376e-07,2.246756,8e-06,0.000824
242,2019-05-20 02:45:46.411717,tune 0 by stratified,52,2.2497,4.356378e-07,2.25051,8e-06,0.00081
205,2019-05-20 02:45:30.413636,tune 0 by stratified,52,2.29004,6.297292e-07,2.290878,4e-06,0.000838
302,2019-05-20 02:46:05.523904,tune 0 by stratified,52,2.292517,6.447328e-07,2.293353,3e-06,0.000836


In [43]:
param = copy.deepcopy(df_trial.loc[234]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 234 use group')

In [44]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [45]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
404,2019-05-20 02:50:37.878055,52,2.06857,0.002412,2.137323,0.006813,0.068753


In [46]:
db.commit()