In [8]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from fastFM import als, mcmc, sgd
# from pyfm import pylibfm

import optuna

from common import EP
from dfdb import DFDB

import types
import copy

In [9]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [10]:
pd.set_option('display.max_colwidth', -1)

In [11]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [12]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [13]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [14]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [15]:
common_columns50 = ["number_peaks{'n': 5}",
 'abs_q75_6',
 'q01_roll_std_100',
 '5000crest_factor_quantile75',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 'abs_q05_6',
 '5000form_factor_quantile75',
 '5000smoothness_quantile05',
 "quantile{'q': 0.9}",
 'abs_q75_2',
 'q01_roll_std_1000',
 '5000quantile75mean_',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 "number_peaks{'n': 1}",
 '5000smoothness_mean_',
 'abs_q25_5',
 '5000std_quantile05',
 '5000smoothness_std_',
 '5000smoothness_median_',
 '5000median_variance_',
 'spkt_welch_density__coeff_4',
 '5000variance_quantile25',
 'abs_q95_2',
 'abs_q75_7',
 'q05_5',
 '5000smoothness_entropy_',
 '5000smoothness_quantile25',
 'q01_2',
 'q05_roll_std_100',
 '3th_peak_freq',
 "autocorrelation{'lag': 5}",
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'q01_roll_std_10',
 'abs_q01_3',
 '5000variance_median_',
 'q75_roll_std_10',
 '5000skewness_max_',
 'iqr_3',
 'abs_q01_5',
 'q75_roll_mean_10',
 '5000quantile99median_',
 'iqr_6',
 'Hilbert_mean_6',
 'q05_roll_std_1000',
 "number_peaks{'n': 3}",
 'spkt_welch_densitycoeff_2',
 '5000std_median_',
 '5000std_quantile25']

In [16]:
common_columns75 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 'q05_roll_std_10',
 'median__roll_std',
 '5000smoothness_quantile05',
 'spkt_welch_density__coeff_3',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'abs_q75_7',
 'q05_5',
 'q05_roll_std_100',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'iqr_6',
 'q05_roll_std_1000']

In [17]:
common_columns95 = ['abs_q75_6',
 'abs_q01_4',
 'q25_roll_std_100',
 "number_peaks{'n': 10}",
 'abs_q25_5',
 'q05_roll_std_100',
 'iqr_6',
 'q05_roll_std_1000']

In [28]:
# mytrial = []
db = DFDB('../trial2/fm.pkl', auto_commit=False)

In [35]:
param = {'algorithm': {'cls': 'als.FMRegression',
  'fit': {},
  'init': {'n_iter': 10,
   'init_stdev': 0.00030963137584220923,
   'rank': 2,
   'random_state': 42,
   'l2_reg_w': 0.1,
   'l2_reg_V': 0.1,
   'l2_reg': 0}},
 'columns': common_columns95,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler'}}

In [36]:
# run one try
mytrial = []
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='try common_columns95')
db.insert(mytrial[0])
df_trial = db.select()

In [37]:
df_trial[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
0,2019-05-20 00:28:34.290778,52,2.04282,0.002401,2.144192,0.005809,0.101372,try common_columns50
1,2019-05-20 00:28:47.536473,15,2.094174,0.003788,2.105352,0.014602,0.011178,try common_columns75
2,2019-05-20 00:29:08.180727,8,2.108261,0.003726,2.114774,0.015691,0.006512,try common_columns95


In [38]:
db.commit()

In [40]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    n_iter = trial.suggest_int('n_iter', 10, 100)
    init_stdev = trial.suggest_uniform('init_stdev', 0.00001, .01)
    rank = trial.suggest_int('rank', 2, 16)
    random_state = trial.suggest_int('random_state', 0, 9999)
        
    args={'algorithm': {'cls': 'als.FMRegression',
      'fit': {},
      'init': {'n_iter': n_iter,
       'init_stdev': init_stdev,
       'rank': rank,
       'random_state': random_state,}},
     'columns': common_columns75,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'group'},
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 00:30:24,617] Finished a trial resulted in value: 0.0417999805729673. Current best value is 0.0417999805729673 with parameters: {'n_iter': 16, 'init_stdev': 0.009450577024697319, 'rank': 15, 'random_state': 8525}.
[I 2019-05-20 00:30:32,405] Finished a trial resulted in value: 0.061900337141179054. Current best value is 0.0417999805729673 with parameters: {'n_iter': 16, 'init_stdev': 0.009450577024697319, 'rank': 15, 'random_state': 8525}.
[I 2019-05-20 00:30:33,798] Finished a trial resulted in value: 0.03560918703918256. Current best value is 0.03560918703918256 with parameters: {'n_iter': 11, 'init_stdev': 0.0004925220959255669, 'rank': 8, 'random_state': 8154}.
[I 2019-05-20 00:30:35,687] Finished a trial resulted in value: 0.04106028363971468. Current best value is 0.03560918703918256 with parameters: {'n_iter': 11, 'init_stdev': 0.0004925220959255669, 'rank': 8, 'random_state': 8154}.
[I 2019-05-20 00:30:43,622] Finished a trial resulted in value: 0.0623051979832416

[I 2019-05-20 00:32:52,326] Finished a trial resulted in value: 0.05905920592431996. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:32:54,735] Finished a trial resulted in value: 0.04942294417247465. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:32:57,750] Finished a trial resulted in value: 0.05089374783029007. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:33:02,225] Finished a trial resulted in value: 0.06783707017944043. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:33:13,174] Finished a trial resulted in value: 0.0688628882

[I 2019-05-20 00:34:56,655] Finished a trial resulted in value: 0.03829961776488008. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:35:00,355] Finished a trial resulted in value: 0.04779870958825259. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:35:06,288] Finished a trial resulted in value: 0.055866522131453766. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:35:07,761] Finished a trial resulted in value: 0.034770148354015595. Current best value is 0.029202648069656195 with parameters: {'n_iter': 10, 'init_stdev': 0.00022426126635392406, 'rank': 4, 'random_state': 591}.
[I 2019-05-20 00:35:09,278] Finished a trial resulted in value: 0.03982247

[I 2019-05-20 00:36:30,713] Finished a trial resulted in value: 0.03877574685916855. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:36:31,975] Finished a trial resulted in value: 0.0365968444062103. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:36:32,941] Finished a trial resulted in value: 0.03657582392311764. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:36:34,292] Finished a trial resulted in value: 0.04236517960928578. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:36:37,696] Finished a trial resulted in value: 0.04998681410830594. 

[I 2019-05-20 00:38:34,835] Finished a trial resulted in value: 0.06277964754041482. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:38:46,051] Finished a trial resulted in value: 0.08007606100785848. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:38:52,106] Finished a trial resulted in value: 0.06783070414547478. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:38:57,068] Finished a trial resulted in value: 0.06598583965205178. Current best value is 0.02824522459092479 with parameters: {'n_iter': 12, 'init_stdev': 0.008398437455565568, 'rank': 2, 'random_state': 5192}.
[I 2019-05-20 00:39:00,537] Finished a trial resulted in value: 0.0609836898850795. 

[I 2019-05-20 00:39:47,130] Finished a trial resulted in value: 0.041627458708174074. Current best value is 0.0239444572420319 with parameters: {'n_iter': 10, 'init_stdev': 0.0010010043785129807, 'rank': 2, 'random_state': 5289}.
[I 2019-05-20 00:39:49,134] Finished a trial resulted in value: 0.04024407147518729. Current best value is 0.0239444572420319 with parameters: {'n_iter': 10, 'init_stdev': 0.0010010043785129807, 'rank': 2, 'random_state': 5289}.
[I 2019-05-20 00:39:51,984] Finished a trial resulted in value: 0.05222638995150707. Current best value is 0.0239444572420319 with parameters: {'n_iter': 10, 'init_stdev': 0.0010010043785129807, 'rank': 2, 'random_state': 5289}.
[I 2019-05-20 00:39:53,185] Finished a trial resulted in value: 0.04194252041459983. Current best value is 0.0239444572420319 with parameters: {'n_iter': 10, 'init_stdev': 0.0010010043785129807, 'rank': 2, 'random_state': 5289}.
[I 2019-05-20 00:39:55,112] Finished a trial resulted in value: 0.04347515776802332

In [41]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [45]:
df_trial[(df_trial['remark']=='tune 1')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']].head()

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
75,2019-05-20 00:35:06.279843,15,2.061092,0.003389,2.08785,0.014045,0.026758,tune 1
16,2019-05-20 00:31:31.366877,15,2.066103,0.003461,2.089005,0.014203,0.022903,tune 1
80,2019-05-20 00:35:22.984749,15,2.061674,0.00336,2.089156,0.014277,0.027482,tune 1
119,2019-05-20 00:37:10.790879,15,2.061585,0.003405,2.089197,0.014182,0.027612,tune 1
3,2019-05-20 00:30:24.616046,15,2.069191,0.003481,2.089199,0.014025,0.020008,tune 1


In [46]:
db.commit()

In [47]:
param = copy.deepcopy(df_trial.loc[75]['param'])
param['kfold']['type'] = 'stratified'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 75 use stratified')

In [48]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [49]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
203,2019-05-20 00:43:35.910470,15,2.066791,6e-06,2.070952,5.2e-05,0.004161


In [50]:
db.commit()

In [51]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    n_iter = trial.suggest_int('n_iter', 10, 100)
    init_stdev = trial.suggest_uniform('init_stdev', 0.00001, .01)
    rank = trial.suggest_int('rank', 2, 16)
    random_state = trial.suggest_int('random_state', 0, 9999)
        
    args={'algorithm': {'cls': 'als.FMRegression',
      'fit': {},
      'init': {'n_iter': n_iter,
       'init_stdev': init_stdev,
       'rank': rank,
       'random_state': random_state,}},
     'columns': common_columns75,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'stratified'},
     'scaler': {'cls': 'StandardScaler'}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-20 00:44:58,102] Finished a trial resulted in value: 0.006881828234466765. Current best value is 0.006881828234466765 with parameters: {'n_iter': 27, 'init_stdev': 0.0005436483460892114, 'rank': 16, 'random_state': 9203}.
[I 2019-05-20 00:45:07,770] Finished a trial resulted in value: 0.009998656352255659. Current best value is 0.006881828234466765 with parameters: {'n_iter': 27, 'init_stdev': 0.0005436483460892114, 'rank': 16, 'random_state': 9203}.
[I 2019-05-20 00:45:13,152] Finished a trial resulted in value: 0.009147220895245457. Current best value is 0.006881828234466765 with parameters: {'n_iter': 27, 'init_stdev': 0.0005436483460892114, 'rank': 16, 'random_state': 9203}.
[I 2019-05-20 00:45:16,081] Finished a trial resulted in value: 0.006318500076194243. Current best value is 0.006318500076194243 with parameters: {'n_iter': 75, 'init_stdev': 0.008332697242587353, 'rank': 2, 'random_state': 9283}.
[I 2019-05-20 00:45:19,478] Finished a trial resulted in value: 0.0062

[I 2019-05-20 00:47:18,294] Finished a trial resulted in value: 0.004667655001840915. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:47:20,515] Finished a trial resulted in value: 0.005438956120594387. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:47:28,426] Finished a trial resulted in value: 0.009519367505015282. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:47:32,225] Finished a trial resulted in value: 0.006077426072207256. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:47:38,856] Finished a trial resulted in value: 0.00

[I 2019-05-20 00:49:30,153] Finished a trial resulted in value: 0.0046904759599844656. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:49:31,207] Finished a trial resulted in value: 0.00793735272133666. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:49:34,037] Finished a trial resulted in value: 0.006332595963357297. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:49:35,552] Finished a trial resulted in value: 0.0054267483402667994. Current best value is 0.0036893242276301408 with parameters: {'n_iter': 24, 'init_stdev': 0.0015156443054072921, 'rank': 4, 'random_state': 6790}.
[I 2019-05-20 00:49:38,906] Finished a trial resulted in value: 0.0

[I 2019-05-20 00:51:17,127] Finished a trial resulted in value: 0.006831503913863786. Current best value is 0.00273454189435239 with parameters: {'n_iter': 10, 'init_stdev': 0.003618298975699801, 'rank': 2, 'random_state': 362}.
[I 2019-05-20 00:51:19,848] Finished a trial resulted in value: 0.007127361565466361. Current best value is 0.00273454189435239 with parameters: {'n_iter': 10, 'init_stdev': 0.003618298975699801, 'rank': 2, 'random_state': 362}.
[I 2019-05-20 00:51:28,183] Finished a trial resulted in value: 0.009274557559945345. Current best value is 0.00273454189435239 with parameters: {'n_iter': 10, 'init_stdev': 0.003618298975699801, 'rank': 2, 'random_state': 362}.
[I 2019-05-20 00:51:29,071] Finished a trial resulted in value: 0.003295736793190923. Current best value is 0.00273454189435239 with parameters: {'n_iter': 10, 'init_stdev': 0.003618298975699801, 'rank': 2, 'random_state': 362}.
[I 2019-05-20 00:51:31,291] Finished a trial resulted in value: 0.005839149549992836

[I 2019-05-20 00:53:11,377] Finished a trial resulted in value: 0.0088317212029251. Current best value is 0.002097438016032469 with parameters: {'n_iter': 10, 'init_stdev': 0.005956826536658993, 'rank': 3, 'random_state': 943}.
[I 2019-05-20 00:53:13,411] Finished a trial resulted in value: 0.004740811508673701. Current best value is 0.002097438016032469 with parameters: {'n_iter': 10, 'init_stdev': 0.005956826536658993, 'rank': 3, 'random_state': 943}.
[I 2019-05-20 00:53:16,016] Finished a trial resulted in value: 0.007225742853422012. Current best value is 0.002097438016032469 with parameters: {'n_iter': 10, 'init_stdev': 0.005956826536658993, 'rank': 3, 'random_state': 943}.
[I 2019-05-20 00:53:17,793] Finished a trial resulted in value: 0.006157531200334461. Current best value is 0.002097438016032469 with parameters: {'n_iter': 10, 'init_stdev': 0.005956826536658993, 'rank': 3, 'random_state': 943}.
[I 2019-05-20 00:53:22,622] Finished a trial resulted in value: 0.0066272825336079

[I 2019-05-20 00:54:12,215] Finished a trial resulted in value: 0.0039432924313434065. Current best value is 0.0018093616686999535 with parameters: {'n_iter': 11, 'init_stdev': 0.0024080047879254693, 'rank': 2, 'random_state': 1469}.
[I 2019-05-20 00:54:13,919] Finished a trial resulted in value: 0.003844359388746866. Current best value is 0.0018093616686999535 with parameters: {'n_iter': 11, 'init_stdev': 0.0024080047879254693, 'rank': 2, 'random_state': 1469}.
[I 2019-05-20 00:54:19,016] Finished a trial resulted in value: 0.0060031598953831705. Current best value is 0.0018093616686999535 with parameters: {'n_iter': 11, 'init_stdev': 0.0024080047879254693, 'rank': 2, 'random_state': 1469}.
[I 2019-05-20 00:54:22,166] Finished a trial resulted in value: 0.0066559245829975475. Current best value is 0.0018093616686999535 with parameters: {'n_iter': 11, 'init_stdev': 0.0024080047879254693, 'rank': 2, 'random_state': 1469}.
[I 2019-05-20 00:54:25,381] Finished a trial resulted in value: 0

In [52]:
for trial_i in mytrial:
    db.insert(trial_i)

In [53]:
df_trial = db.select()
df_trial[(df_trial['remark']=='tune 1 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
289,2019-05-20 00:50:08.943796,tune 1 by stratified,15,2.059574,1e-05,2.063472,4.2e-05,0.003898
213,2019-05-20 00:45:54.558134,tune 1 by stratified,15,2.061611,1.5e-05,2.066388,5.2e-05,0.004776
328,2019-05-20 00:52:19.793560,tune 1 by stratified,15,2.063665,1.2e-05,2.067661,5.5e-05,0.003997
205,2019-05-20 00:45:07.768878,tune 1 by stratified,15,2.063036,8e-06,2.067871,6.8e-05,0.004835
303,2019-05-20 00:51:00.548343,tune 1 by stratified,15,2.063842,1.3e-05,2.06803,4.7e-05,0.004188
385,2019-05-20 00:54:38.741970,tune 1 by stratified,15,2.063801,4e-06,2.068442,7.9e-05,0.004641
311,2019-05-20 00:51:28.171870,tune 1 by stratified,15,2.064298,7e-06,2.068781,6.1e-05,0.004483
256,2019-05-20 00:48:25.603815,tune 1 by stratified,15,2.063698,1.4e-05,2.069062,5.4e-05,0.005364
265,2019-05-20 00:49:07.567526,tune 1 by stratified,15,2.0648,1.1e-05,2.069381,6.1e-05,0.004581
257,2019-05-20 00:48:35.570225,tune 1 by stratified,15,2.065514,1e-05,2.069634,0.000114,0.00412


In [54]:
param = copy.deepcopy(df_trial.loc[289]['param'])
param['kfold']['type'] = 'group'
# run one try
mytrial=[]
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 289 use group')

In [55]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [56]:
df_trial.tail(1)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
404,2019-05-20 01:07:49.204141,15,2.052364,0.003006,2.091492,0.015884,0.039128


In [57]:
db.commit()