In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from fastFM import als, mcmc, sgd
# from pyfm import pylibfm

import optuna

from common import EP
from dfdb import DFDB

import types
import copy

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
pd.set_option('display.max_colwidth', -1)

In [4]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [5]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [6]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [7]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [8]:
catboost_columns = ['spkt_welch_density__coeff_3',
 'spkt_welch_densitycoeff_2',
 'abs_q25_5',
 'abs_q75_6',
 'q05_roll_std_1000',
 'abs_q75_7',
 'abs_q95_2',
 'q05_5',
 'abs_q75_2',
 '5000skewness_max_',
 'fft_coefficientcoeff_80__attr_"imag"',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 '5000kurtosis_mean_',
 "number_peaks{'n': 1}",
 '5000smoothness_entropy_',
 'ave10_7',
 'q75_roll_std_1000',
 'FFT_Mag_25q0',
 'fft_coefficientcoeff_20__attr_"abs"']
lgbm_columns = ['q25_roll_std_100',
 'abs_q25_5',
 'spkt_welch_density__coeff_3',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 'median__roll_std',
 'abs_q01_5',
 '5000smoothness_quantile05',
 '5000smoothness_std_',
 'abs_q95_3',
 'FFT_Mag_75q0',
 '5000median_std_',
 'spkt_welch_density__coeff_17']
xgbm_columns = ['q25_roll_std_100',
 'abs_q25_5',
 'spkt_welch_density__coeff_3',
 'abs_q75_7',
 'spkt_welch_densitycoeff_2',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 "number_peaks{'n': 1}",
 '5000peak_peak_amp_max_',
 'abs_q95_3',
 'spkt_welch_density__coeff_89',
 'abs_q05_2']
randomforest_randomforest = ['abs_q25_5', 'abs_q01_4', 'q25_roll_std_100']
extratrees_columns = ['q05_2',
 "number_peaks{'n': 1}",
 'abs_q01_6',
 'abs_q95_2',
 '5000smoothness_quantile25',
 '5000std_median_',
 '5000smoothness_median_',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q75_6',
 "number_peaks{'n': 3}",
 'q75_roll_std_10',
 "number_peaks{'n': 10}",
 '5000min_quantile75',
 '5000smoothness_quantile05',
 "number_peaks{'n': 5}",
 'abs_q01_2',
 '5000smoothness_mean_',
 'min_roll_std_100',
 'abs_q05_2',
 'q01_roll_std_1000']
gradientboosting_columns = ['q05_5',
 'kurt_1',
 'abs_q75_6',
 'abs_q75_7',
 'spkt_welch_density__coeff_28',
 'spkt_welch_density__coeff_99',
 'fft_coefficientcoeff_6__attr_"abs"',
 '5000smoothness_quantile05',
 'q25_roll_std_100',
 'spkt_welch_densitycoeff_2',
 'abs_max_1',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'abs_q25_5',
 'abs_q01_7',
 'q05_8',
 'Hilbert_mean_6',
 'abs_q95_2',
 '5000skewness_max_',
 '5000kurtosis_mean_',
 'spkt_welch_density__coeff_3']

In [9]:
all_columns = catboost_columns+lgbm_columns+xgbm_columns+randomforest_randomforest+extratrees_columns+gradientboosting_columns
unique_columns = list(set(all_columns))
common_columns = []
common_columns50 = []
common_columns75 = []
common_columns95 = []
N_columns = 6
count_values = [all_columns.count(col) for col in unique_columns]
for col in unique_columns:
    if all_columns.count(col)==N_columns:
        common_columns.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .5):
        common_columns50.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .75):
        common_columns75.append(col)
    if all_columns.count(col)>=np.quantile(count_values, .95):
        common_columns95.append(col)
print('unique_columns ',len(unique_columns))
print('common_columns50 ',len(common_columns50))
print('common_columns75 ',len(common_columns75))
print('common_columns95 ',len(common_columns95))
print('common_columns ',len(common_columns))

unique_columns  55
common_columns50  55
common_columns75  15
common_columns95  7
common_columns  0


In [27]:
common_columns95

['abs_q25_5',
 'q25_roll_std_100',
 'spkt_welch_density__coeff_3',
 'abs_q75_7',
 'abs_q75_6',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_densitycoeff_2']

In [10]:
# mytrial = []
db = DFDB('../trial/knn.pkl', auto_commit=False)

In [12]:
param = {'algorithm': {'cls': 'KNeighborsRegressor',
  'fit': {},
  'init': {'n_neighbors': 477,
   'weights': 'uniform',
   'algorithm': 'ball_tree',
   'leaf_size': 30,
   'p': 2,
   'metric': 'minkowski'}},
 'columns':common_columns95 ,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init':{}}}


mytrial = []
columns = common_columns50

# param['algorithm']['init'] = {'logging_level': 'Silent', **param['algorithm']['init']}
# param['scaler']['init'] = {}
# param['algorithm']['fit'] = {}

selected_columns = EP.revert_rfe(df_train, param, columns, df_test, mytrial, start_columns=common_columns95, limit=20, remark='start from top1 column')
print(len(selected_columns))
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])

15


In [13]:
df_trial[df_trial['remark']=='start from top1 column'][['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
19,2019-05-22 11:47:17.419196,3,2.054778,0.003355,2.085213,0.013904,0.030435,start from top1 column
20,2019-05-22 11:47:39.852130,4,2.032224,0.002055,2.078742,0.012107,0.046518,start from top1 column
21,2019-05-22 11:48:06.707915,5,2.029920,0.001955,2.077330,0.012034,0.047410,start from top1 column
22,2019-05-22 11:48:43.627287,6,2.045170,0.002783,2.086831,0.013133,0.041661,start from top1 column
23,2019-05-22 11:49:10.483256,5,2.029920,0.001955,2.077330,0.012034,0.047410,start from top1 column
24,2019-05-22 11:49:39.231655,6,2.028500,0.001882,2.079078,0.011952,0.050578,start from top1 column
25,2019-05-22 11:50:05.086891,6,2.033676,0.001958,2.079419,0.011719,0.045743,start from top1 column
26,2019-05-22 11:50:33.751753,6,2.027707,0.001825,2.078595,0.012264,0.050889,start from top1 column
27,2019-05-22 11:51:00.454750,6,2.030642,0.001948,2.077809,0.012141,0.047167,start from top1 column
28,2019-05-22 11:51:28.412728,6,2.031211,0.001940,2.079024,0.012325,0.047813,start from top1 column


In [14]:
db.commit()

In [16]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    n_neighbors = trial.suggest_int('n_neighbors', 100, 1000)
        
    args={'algorithm': {'cls': 'KNeighborsRegressor',
  'fit': {},
  'init': {'n_neighbors': n_neighbors,
   'weights': 'uniform',
   'algorithm': 'ball_tree',
   'leaf_size': 30,
   'p': 2,
   'metric': 'minkowski'}},
 'columns':selected_columns ,
 'kfold': {'n_splits': 8,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init':{}}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 102')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-25 06:31:06,184] Finished a trial resulted in value: 0.19995357426932772. Current best value is 0.19995357426932772 with parameters: {'n_neighbors': 380}.
[I 2019-05-25 06:33:14,629] Finished a trial resulted in value: 0.24879974848044548. Current best value is 0.19995357426932772 with parameters: {'n_neighbors': 380}.
[I 2019-05-25 06:35:21,858] Finished a trial resulted in value: 0.24041871005196755. Current best value is 0.19995357426932772 with parameters: {'n_neighbors': 380}.
[I 2019-05-25 06:39:03,586] Finished a trial resulted in value: 0.15461191687172696. Current best value is 0.15461191687172696 with parameters: {'n_neighbors': 898}.
[I 2019-05-25 06:43:27,910] Finished a trial resulted in value: 0.1577073825399229. Current best value is 0.15461191687172696 with parameters: {'n_neighbors': 898}.
[I 2019-05-25 06:47:34,504] Finished a trial resulted in value: 0.15337767712266728. Current best value is 0.15337767712266728 with parameters: {'n_neighbors': 923}.
[I 20

[I 2019-05-25 08:58:50,571] Finished a trial resulted in value: 0.17194214219040005. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-25 09:01:49,942] Finished a trial resulted in value: 0.1644814248239749. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-25 09:04:45,808] Finished a trial resulted in value: 0.16640556142995447. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-25 09:07:52,772] Finished a trial resulted in value: 0.16134681991255395. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-25 09:11:19,096] Finished a trial resulted in value: 0.15446714429548894. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-25 09:13:30,955] Finished a trial resulted in value: 0.20275522048832742. Current best value is 0.1502025200890898 with parameters: {'n_neighbors': 998}.
[I 2019-05-

[I 2019-05-25 12:02:41,054] Finished a trial resulted in value: 0.1515084506838252. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 12:05:41,124] Finished a trial resulted in value: 0.1637332344057581. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 12:08:39,222] Finished a trial resulted in value: 0.1659921463497242. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 12:12:04,352] Finished a trial resulted in value: 0.15522178696776223. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 12:15:49,183] Finished a trial resulted in value: 0.1792975911362291. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 12:18:45,927] Finished a trial resulted in value: 0.21030907652772599. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I

[I 2019-05-25 14:50:03,046] Finished a trial resulted in value: 0.15020819834280522. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 14:53:28,168] Finished a trial resulted in value: 0.1534406228553356. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 14:56:45,017] Finished a trial resulted in value: 0.15663276880410432. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 15:00:08,977] Finished a trial resulted in value: 0.15413259670914314. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 15:03:38,450] Finished a trial resulted in value: 0.15228936777099136. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 15:07:12,486] Finished a trial resulted in value: 0.15102506766385912. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.

[I 2019-05-25 17:29:36,273] Finished a trial resulted in value: 0.171250678848922. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 17:33:04,759] Finished a trial resulted in value: 0.15289375834604124. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 17:35:39,099] Finished a trial resulted in value: 0.1825795630442704. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.
[I 2019-05-25 17:39:12,889] Finished a trial resulted in value: 0.1502025200890898. Current best value is 0.15018462186280732 with parameters: {'n_neighbors': 1000}.


In [17]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [18]:
df_trial[(df_trial['remark']=='tune 102')&(df_trial['mae_diff']<.1)].sort_values(by=['val_mae'])[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']].head()

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
132,2019-05-25 07:58:22.773360,15,2.0029,0.012259,2.101092,0.521664,0.098192,tune 102
110,2019-05-25 06:49:51.903847,15,2.003115,0.012269,2.101107,0.521757,0.097993,tune 102
248,2019-05-25 14:39:36.408453,15,2.003705,0.012278,2.101261,0.521939,0.097557,tune 102
158,2019-05-25 09:13:30.950639,15,2.005091,0.012291,2.101569,0.522218,0.096478,tune 102
224,2019-05-25 13:18:09.378289,15,2.00582,0.012311,2.101681,0.522534,0.09586,tune 102


In [19]:
db.commit()

In [20]:
mytrial = []
#  tune hypterparameters
def objective(trial):

    n_neighbors = trial.suggest_int('n_neighbors', 100, 1000)
        
    args={'algorithm': {'cls': 'KNeighborsRegressor',
  'fit': {},
  'init': {'n_neighbors': n_neighbors,
   'weights': 'uniform',
   'algorithm': 'ball_tree',
   'leaf_size': 30,
   'p': 2,
   'metric': 'minkowski'}},
 'columns':selected_columns ,
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'stratified'},
 'scaler': {'cls': 'StandardScaler', 'init':{}}}
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 102 by stratified')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return np.abs(val_mae_mean - train_mae_mean)*val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-25 17:39:57,210] Finished a trial resulted in value: 0.00952651757551283. Current best value is 0.00952651757551283 with parameters: {'n_neighbors': 322}.
[I 2019-05-25 17:40:59,002] Finished a trial resulted in value: 0.004105136371208019. Current best value is 0.004105136371208019 with parameters: {'n_neighbors': 805}.
[I 2019-05-25 17:41:32,937] Finished a trial resulted in value: 0.014099639826949758. Current best value is 0.004105136371208019 with parameters: {'n_neighbors': 805}.
[I 2019-05-25 17:42:00,656] Finished a trial resulted in value: 0.03019400831682931. Current best value is 0.004105136371208019 with parameters: {'n_neighbors': 805}.
[I 2019-05-25 17:43:06,381] Finished a trial resulted in value: 0.0036511111879391498. Current best value is 0.0036511111879391498 with parameters: {'n_neighbors': 906}.
[I 2019-05-25 17:43:33,290] Finished a trial resulted in value: 0.03562078854340069. Current best value is 0.0036511111879391498 with parameters: {'n_neighbors':

[I 2019-05-25 18:24:49,559] Finished a trial resulted in value: 0.0037627403869805523. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 18:25:57,310] Finished a trial resulted in value: 0.0032208067488667977. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 18:26:28,090] Finished a trial resulted in value: 0.02651465564213282. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 18:27:33,599] Finished a trial resulted in value: 0.003878143365871402. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 18:28:36,050] Finished a trial resulted in value: 0.004554158439028245. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 18:29:27,670] Finished a trial resulted in value: 0.005654340957954571. Current best value is 0.0030955725434523994 with parameters: {'n_nei

[I 2019-05-25 19:13:57,664] Finished a trial resulted in value: 0.008979451588397214. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 19:14:49,996] Finished a trial resulted in value: 0.005240256386736896. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 19:15:51,770] Finished a trial resulted in value: 0.00452788178057214. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 19:16:59,781] Finished a trial resulted in value: 0.003842996115314099. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 19:17:50,149] Finished a trial resulted in value: 0.006424019155858553. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 19:18:58,592] Finished a trial resulted in value: 0.0035440088544211566. Current best value is 0.0030955725434523994 with parameters: {'n_neig

[I 2019-05-25 20:02:10,906] Finished a trial resulted in value: 0.0069939811223753545. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 20:03:21,980] Finished a trial resulted in value: 0.003203004530034747. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 20:04:31,226] Finished a trial resulted in value: 0.0032208067488667977. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 20:05:43,173] Finished a trial resulted in value: 0.003241039978136608. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 20:06:53,148] Finished a trial resulted in value: 0.003357761676852382. Current best value is 0.0030955725434523994 with parameters: {'n_neighbors': 938}.
[I 2019-05-25 20:08:00,616] Finished a trial resulted in value: 0.0037627403869805523. Current best value is 0.0030955725434523994 with parameters: {'n_n

[I 2019-05-25 20:52:59,406] Finished a trial resulted in value: 0.003508475637845144. Current best value is 0.003073548770433164 with parameters: {'n_neighbors': 940}.
[I 2019-05-25 20:53:53,597] Finished a trial resulted in value: 0.005107393628894286. Current best value is 0.003073548770433164 with parameters: {'n_neighbors': 940}.
[I 2019-05-25 20:55:04,339] Finished a trial resulted in value: 0.003283649737474805. Current best value is 0.003073548770433164 with parameters: {'n_neighbors': 940}.
[I 2019-05-25 20:56:11,798] Finished a trial resulted in value: 0.0037627403869805523. Current best value is 0.003073548770433164 with parameters: {'n_neighbors': 940}.
[I 2019-05-25 20:57:20,600] Finished a trial resulted in value: 0.0031825395905928267. Current best value is 0.003073548770433164 with parameters: {'n_neighbors': 940}.
[I 2019-05-25 20:58:23,291] Finished a trial resulted in value: 0.004215257433567653. Current best value is 0.003073548770433164 with parameters: {'n_neighbor

In [21]:
for trial_i in mytrial:
    db.insert(trial_i)
df_trial = db.select()

In [22]:
df_trial[(df_trial['remark']=='tune 102 by stratified')&(df_trial['mae_diff']<.05)].sort_values(by=['val_mae'])[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff','remark']].head()

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff,remark
309,2019-05-25 17:43:33.288561,15,1.973323,1.4e-05,1.991212,2.9e-05,0.017889,tune 102 by stratified
412,2019-05-25 19:25:35.404389,15,1.978986,1.3e-05,1.994967,3.6e-05,0.015981,tune 102 by stratified
307,2019-05-25 17:42:00.654669,15,1.980976,1.4e-05,1.996103,3.8e-05,0.015126,tune 102 by stratified
354,2019-05-25 18:26:28.085290,15,1.986965,1.5e-05,2.000221,4.8e-05,0.013256,tune 102 by stratified
310,2019-05-25 17:44:03.656241,15,1.993379,1.6e-05,2.004368,6e-05,0.010989,tune 102 by stratified


In [23]:
db.commit()

In [25]:
mytrial=[]
param = df_trial.loc[309]['param']
param['kfold']['type']='group'
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='remodel 309 by group')