In [4]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso

import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [5]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [25]:
pd.set_option('display.max_colwidth', -1)

In [6]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [7]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [8]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [9]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [10]:
catboost_columns = ['spkt_welch_density__coeff_3',
 'spkt_welch_densitycoeff_2',
 'abs_q25_5',
 'abs_q75_6',
 'q05_roll_std_1000',
 'abs_q75_7',
 'abs_q95_2',
 'q05_5',
 'abs_q75_2',
 '5000skewness_max_',
 'fft_coefficientcoeff_80__attr_"imag"',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 '5000kurtosis_mean_',
 "number_peaks{'n': 1}",
 '5000smoothness_entropy_',
 'ave10_7',
 'q75_roll_std_1000',
 'FFT_Mag_25q0',
 'fft_coefficientcoeff_20__attr_"abs"']
lgbm_columns = ['q25_roll_std_100',
 'abs_q25_5',
 'spkt_welch_density__coeff_3',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 'median__roll_std',
 'abs_q01_5',
 '5000smoothness_quantile05',
 '5000smoothness_std_',
 'abs_q95_3',
 'FFT_Mag_75q0',
 '5000median_std_',
 'spkt_welch_density__coeff_17']
xgbm_columns = ['q25_roll_std_100',
 'abs_q25_5',
 'spkt_welch_density__coeff_3',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 "number_peaks{'n': 1}",
 '5000peak_peak_amp_max_',
 'abs_q95_3',
 'spkt_welch_density__coeff_89',
 'abs_q05_2']
randomforest_randomforest = ['abs_q25_5', 'abs_q01_4', 'q25_roll_std_100']
extratrees_columns = ['q05_2',
 "number_peaks{'n': 1}",
 'abs_q01_6',
 'abs_q95_2',
 '5000smoothness_quantile25',
 '5000std_median_',
 '5000smoothness_median_',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q75_6',
 "number_peaks{'n': 3}",
 'q75_roll_std_10',
 "number_peaks{'n': 10}",
 '5000min_quantile75',
 '5000smoothness_quantile05',
 "number_peaks{'n': 5}",
 'abs_q01_2',
 '5000smoothness_mean_',
 'min_roll_std_100',
 'abs_q05_2',
 'q01_roll_std_1000']
gradientboosting_columns = ['q05_5',
 'kurt_1',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_density__coeff_28',
 'spkt_welch_density__coeff_99',
 'fft_coefficientcoeff_6__attr_"abs"',
 '5000smoothness_quantile05',
 'q25_roll_std_100',
 'spkt_welch_densitycoeff_2',
 'abs_max_1',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q25_5',
 'abs_q01_7',
 'q05_8',
 'Hilbert_mean_6',
 'abs_q95_2',
 '5000skewness_max_',
 '5000kurtosis_mean_',
 'spkt_welch_density__coeff_3']

In [11]:
all_columns = catboost_columns+lgbm_columns+xgbm_columns+randomforest_randomforest+extratrees_columns+gradientboosting_columns
unique_columns = list(set(all_columns))
common_columns = []
common_columns50 = []
common_columns75 = []
common_columns95 = []
N_columns = 6
count_values = [all_columns.count(col) for col in unique_columns]
for col in unique_columns:
    if all_columns.count(col)==N_columns:
        common_columns.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .5):
        common_columns50.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .75):
        common_columns75.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .95):
        common_columns95.append(col)
print('unique_columns ',len(unique_columns))
print('common_columns50 ',len(common_columns50))
print('common_columns75 ',len(common_columns75))
print('common_columns95 ',len(common_columns95))
print('common_columns ',len(common_columns))

unique_columns  55
common_columns50  55
common_columns75  15
common_columns95  7
common_columns  0


In [12]:
db = DFDB('../trial/lasso.pkl', auto_commit=False)

In [36]:
param={'algorithm': {'cls': 'Lasso',
  'fit': {},
  'init': {'alpha': 0.1,
   'fit_intercept': True,
   'normalize': False,
   'precompute': False,
   'copy_X': True,
   'max_iter': 1000,
   'tol': 0.0001,
   'warm_start': False,
   'positive': False,
   'random_state': 42,
   'selection': 'cyclic'}},
 'columns': common_columns95,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init':{}}}

mytrial = []
columns = common_columns50

# param['algorithm']['init'] = {'logging_level': 'Silent', **param['algorithm']['init']}
# param['scaler']['init'] = {}
# param['algorithm']['fit'] = {}

selected_columns = EP.revert_rfe(df_train, param, columns, df_test, mytrial, start_columns=common_columns95, limit=70, remark='start from top1 column 2th')
print(len(selected_columns))
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])

31


In [38]:
df_trial[(df_trial['remark']=='start from top1 column 2th')][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
948,2019-05-25 06:02:04.243014,7,2.215767,0.003325,2.220666,0.013378,0.004899,start from top1 column 2th
949,2019-05-25 06:02:04.501806,7,2.215769,0.003325,2.220668,0.013377,0.004898,start from top1 column 2th
950,2019-05-25 06:02:04.788755,8,2.214968,0.003385,2.219898,0.013223,0.004929,start from top1 column 2th
951,2019-05-25 06:02:05.056677,9,2.209632,0.003382,2.214252,0.013389,0.004619,start from top1 column 2th
952,2019-05-25 06:02:05.373460,10,2.209632,0.003382,2.214252,0.013389,0.004619,start from top1 column 2th
953,2019-05-25 06:02:05.609460,10,2.1697,0.004162,2.175681,0.015833,0.005981,start from top1 column 2th
954,2019-05-25 06:02:05.791332,11,2.1697,0.004162,2.175681,0.015833,0.005981,start from top1 column 2th
955,2019-05-25 06:02:05.969592,11,2.169241,0.004137,2.175343,0.015874,0.006102,start from top1 column 2th
956,2019-05-25 06:02:06.162222,12,2.169234,0.004139,2.175576,0.015926,0.006342,start from top1 column 2th
957,2019-05-25 06:02:06.343619,12,2.169241,0.004137,2.175343,0.015874,0.006102,start from top1 column 2th


In [39]:
db.commit()

In [40]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_uniform('alpha', 0.001, 10)
    tol = trial.suggest_uniform('tol', 0.00001, 0.001)
    max_iter = trial.suggest_int('max_iter', 500, 5000)
    random_state = trial.suggest_int('random_state', 1, 9999)
    
        
    args={'algorithm': {'cls': 'Lasso',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'precompute': False,
       'copy_X': True,
       'max_iter': max_iter,
       'tol': tol,
       'warm_start': False,
       'positive': False,
       'random_state': random_state,
       'selection': 'cyclic'}},
     'columns': selected_columns,
     'kfold': {'n_splits': 8,
      'random_state': 1985,
      'shuffle': True,
      'type': 'group'},
     'scaler': {'cls': 'StandardScaler', 'init':{}}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1003')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-25 06:10:44,437] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.046221861852773626 with parameters: {'alpha': 6.963260031138852, 'tol': 0.0006692596951698213, 'max_iter': 4441, 'random_state': 889}.
[I 2019-05-25 06:10:47,677] Finished a trial resulted in value: 0.1064415096267044. Current best value is 0.046221861852773626 with parameters: {'alpha': 6.963260031138852, 'tol': 0.0006692596951698213, 'max_iter': 4441, 'random_state': 889}.
[I 2019-05-25 06:10:48,465] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.046221861852773626 with parameters: {'alpha': 6.963260031138852, 'tol': 0.0006692596951698213, 'max_iter': 4441, 'random_state': 889}.
[I 2019-05-25 06:10:50,962] Finished a trial resulted in value: 0.08993884518538175. Current best value is 0.046221861852773626 with parameters: {'alpha': 6.963260031138852, 'tol': 0.0006692596951698213, 'max_iter': 4441, 'random_state': 889}.
[I 2019-05-25 06:10:51,

[I 2019-05-25 06:11:29,683] Finished a trial resulted in value: 0.07682298298118781. Current best value is 0.043289350224464056 with parameters: {'alpha': 1.6803157115648797, 'tol': 3.908681986316848e-05, 'max_iter': 990, 'random_state': 9864}.
[I 2019-05-25 06:11:30,488] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.043289350224464056 with parameters: {'alpha': 1.6803157115648797, 'tol': 3.908681986316848e-05, 'max_iter': 990, 'random_state': 9864}.
[I 2019-05-25 06:11:31,225] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.043289350224464056 with parameters: {'alpha': 1.6803157115648797, 'tol': 3.908681986316848e-05, 'max_iter': 990, 'random_state': 9864}.
[I 2019-05-25 06:11:32,083] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.043289350224464056 with parameters: {'alpha': 1.6803157115648797, 'tol': 3.908681986316848e-05, 'max_iter': 990, 'random_state': 9864}.
[I 2019-05-25 06:

[I 2019-05-25 06:12:26,937] Finished a trial resulted in value: 0.045423838231959925. Current best value is 0.04267037960104692 with parameters: {'alpha': 1.5909595350839556, 'tol': 0.0006811637371546568, 'max_iter': 2048, 'random_state': 8190}.
[I 2019-05-25 06:12:29,560] Finished a trial resulted in value: 0.043021239334950306. Current best value is 0.04267037960104692 with parameters: {'alpha': 1.5909595350839556, 'tol': 0.0006811637371546568, 'max_iter': 2048, 'random_state': 8190}.
[I 2019-05-25 06:12:30,854] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.04267037960104692 with parameters: {'alpha': 1.5909595350839556, 'tol': 0.0006811637371546568, 'max_iter': 2048, 'random_state': 8190}.
[I 2019-05-25 06:12:33,278] Finished a trial resulted in value: 0.05951624248817959. Current best value is 0.04267037960104692 with parameters: {'alpha': 1.5909595350839556, 'tol': 0.0006811637371546568, 'max_iter': 2048, 'random_state': 8190}.
[I 2019-05-25 06:

[I 2019-05-25 06:13:34,456] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:13:38,202] Finished a trial resulted in value: 0.09381889823532633. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:13:41,256] Finished a trial resulted in value: 0.05407952124900921. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:13:45,340] Finished a trial resulted in value: 0.08292285044942234. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:13

[I 2019-05-25 06:14:45,408] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:14:46,669] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:14:49,327] Finished a trial resulted in value: 0.04605790763989744. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:14:50,283] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:

[I 2019-05-25 06:15:53,946] Finished a trial resulted in value: 0.05923593407520197. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:15:56,018] Finished a trial resulted in value: 0.046763183663873005. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:15:57,052] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:15:58,337] Finished a trial resulted in value: 0.046221861852773626. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:

[I 2019-05-25 06:16:53,453] Finished a trial resulted in value: 0.10938554635778969. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.
[I 2019-05-25 06:16:55,239] Finished a trial resulted in value: 0.04430365733397495. Current best value is 0.042666590917074365 with parameters: {'alpha': 1.591607006926824, 'tol': 0.0009943817228620152, 'max_iter': 4879, 'random_state': 6893}.


In [41]:
for trial_i in mytrial:
    db.insert(trial_i)

In [42]:
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold']) 
df_trial[(df_trial['remark']=='tune 1003')&(df_trial['mae_diff']<.1)].sort_values(by=['val_mae'])[['datetime','remark', 'kfold', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,kfold,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1176,2019-05-25 06:16:14.598722,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.088054,0.013466,2.144718,0.543831,0.056664
1126,2019-05-25 06:14:27.035895,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.091476,0.013472,2.147124,0.544061,0.055647
1147,2019-05-25 06:15:08.907572,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.091806,0.013475,2.14728,0.54421,0.055474
1083,2019-05-25 06:13:01.200083,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.094425,0.013475,2.148735,0.545319,0.05431
1202,2019-05-25 06:16:53.416355,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.108305,0.013651,2.158971,0.547141,0.050666
1005,2019-05-25 06:10:47.675852,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.117599,0.013666,2.166724,0.549415,0.049126
1044,2019-05-25 06:11:40.303067,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.125695,0.013805,2.172711,0.549413,0.047016
1036,2019-05-25 06:11:27.650460,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.127783,0.013835,2.174058,0.550129,0.046275
1075,2019-05-25 06:12:41.647418,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.134379,0.013968,2.179355,0.550448,0.044976
1180,2019-05-25 06:16:22.753993,tune 1003,"{'n_splits': 8, 'random_state': 1985, 'shuffle': True, 'type': 'group'}",31,2.14399,0.014058,2.186917,0.553298,0.042927


In [43]:
db.commit()

In [49]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    alpha = trial.suggest_uniform('alpha', 0.001, 10)
    tol = trial.suggest_uniform('tol', 0.00001, 0.001)
    max_iter = trial.suggest_int('max_iter', 500, 5000)
    random_state = trial.suggest_int('random_state', 1, 9999)
    
        
    args={'algorithm': {'cls': 'Lasso',
      'fit': {},
      'init': {'alpha': alpha,
       'fit_intercept': True,
       'normalize': False,
       'precompute': False,
       'copy_X': True,
       'max_iter': max_iter,
       'tol': tol,
       'warm_start': False,
       'positive': False,
       'random_state': random_state,
       'selection': 'cyclic'}},
     'columns': selected_columns,
     'kfold': {'n_splits': 3,
      'random_state': 1985,
      'shuffle': True,
      'type': 'stratified'},
     'scaler': {'cls': 'StandardScaler', 'init':{}}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 1003 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-25 06:27:03,238] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:03,653] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:04,123] Finished a trial resulted in value: 0.0010764663070892814. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:04,394] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-

[I 2019-05-25 06:27:13,908] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:14,209] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:14,434] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:14,675] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-

[I 2019-05-25 06:27:24,044] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:24,286] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:24,592] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:24,854] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-

[I 2019-05-25 06:27:35,515] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:35,918] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:36,207] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:36,507] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-

[I 2019-05-25 06:27:46,996] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:47,255] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:47,504] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:27:47,765] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-

[I 2019-05-25 06:27:59,192] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:28:00,331] Finished a trial resulted in value: 0.0017315166457491644. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:28:00,659] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:28:01,137] Finished a trial resulted in value: 0.001484842389308408. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-0

[I 2019-05-25 06:28:12,045] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.
[I 2019-05-25 06:28:12,361] Finished a trial resulted in value: 2.821098296547482e-05. Current best value is 2.821098296547482e-05 with parameters: {'alpha': 8.78798760157148, 'tol': 0.00012333041222997832, 'max_iter': 4112, 'random_state': 2597}.


In [50]:
for trial_i in mytrial:
    db.insert(trial_i)

In [51]:
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold']) 
df_trial[(df_trial['remark']=='tune 1003 by stratified')&(df_trial['mae_diff']<.1)].sort_values(by=['val_mae'])[['datetime','remark', 'kfold', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']].head(10)

Unnamed: 0,datetime,remark,kfold,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
1293,2019-05-25 06:27:32.101174,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.108386,1.649935e-07,2.110364,1.420349e-05,0.001978
1370,2019-05-25 06:28:00.299745,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.149009,1.391426e-07,2.149815,1.89813e-05,0.000805
1215,2019-05-25 06:27:07.319914,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.151843,1.83744e-07,2.152609,1.911218e-05,0.000766
1331,2019-05-25 06:27:45.179259,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.182166,8.453549e-08,2.182933,1.524021e-05,0.000767
1257,2019-05-25 06:27:20.031929,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.204724,2.20508e-07,2.20538,1.629254e-05,0.000656
1291,2019-05-25 06:27:30.569233,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.235206,5.21841e-07,2.235839,1.04419e-05,0.000632
1354,2019-05-25 06:27:53.405199,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.253807,5.538514e-07,2.254494,7.430402e-06,0.000687
1368,2019-05-25 06:27:58.817765,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.274553,7.184045e-07,2.275294,5.168799e-06,0.000741
1375,2019-05-25 06:28:02.649135,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.322177,7.258336e-07,2.322942,9.354687e-07,0.000765
1241,2019-05-25 06:27:15.250226,tune 1003 by stratified,"{'n_splits': 3, 'random_state': 1985, 'shuffle': True, 'type': 'stratified'}",31,2.341391,7.026255e-07,2.342199,2.239867e-07,0.000809


In [52]:
db.commit()