In [30]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from dfdb import DFDB

import numpy as np
from sklearn.decomposition import PCA

import types
import copy

In [31]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [32]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_colwidth', -1)

In [33]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [34]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [37]:
db_catboost = DFDB('../trial/catboost.pkl', auto_commit=False)
df_trial_catboost = db_catboost.select()
df_trial_catboost['kfold'] = df_trial_catboost['param'].apply(lambda x: x['kfold'])

In [38]:
db_xgbm = DFDB('../trial/xgbm.pkl', auto_commit=False)
df_trial_xgbm = db_xgbm.select()
df_trial_xgbm['kfold'] = df_trial_xgbm['param'].apply(lambda x: x['kfold'])

In [39]:
db_lgbm = DFDB('../trial/lgbm.pkl', auto_commit=False)
df_trial_lgbm = db_lgbm.select()
df_trial_lgbm['kfold'] = df_trial_lgbm['param'].apply(lambda x: x['kfold'])

In [40]:
db_randomforest = DFDB('../trial/randomforest.pkl', auto_commit=False)
df_trial_randomforest = db_randomforest.select()
df_trial_randomforest['kfold'] = df_trial_lgbm['param'].apply(lambda x: x['kfold'])

In [41]:
db_extratrees = DFDB('../trial/extratrees.pkl', auto_commit=False)
df_trial_extratrees = db_extratrees.select()
df_trial_extratrees['kfold'] = df_trial_extratrees['param'].apply(lambda x: x['kfold'])

In [42]:
db_gradientboosting = DFDB('../trial/gradientboosting.pkl', auto_commit=False)
df_trial_gradientboosting = db_gradientboosting.select()
df_trial_gradientboosting['kfold'] = df_trial_gradientboosting['param'].apply(lambda x: x['kfold'])

In [43]:
db_knn = DFDB('../trial/knn.pkl', auto_commit=False)
df_trial_knn = db_knn.select()
df_trial_knn['kfold'] = df_trial_knn['param'].apply(lambda x: x['kfold'])

In [44]:
db_svr = DFDB('../trial/svr.pkl', auto_commit=False)
df_trial_svr = db_svr.select()
df_trial_svr['kfold'] = df_trial_knn['param'].apply(lambda x: x['kfold'])

In [45]:
db_fm = DFDB('../trial/fm.pkl', auto_commit=False)
df_trial_fm = db_fm.select()
df_trial_fm['kfold'] = df_trial_knn['param'].apply(lambda x: x['kfold'])

In [46]:
db_lasso = DFDB('../trial/lasso.pkl', auto_commit=False)
df_trial_lasso = db_lasso.select()
df_trial_lasso['kfold'] = df_trial_lasso['param'].apply(lambda x: x['kfold'])

In [47]:
db_ridge = DFDB('../trial/ridge.pkl', auto_commit=False)
df_trial_ridge = db_ridge.select()
df_trial_ridge['kfold'] = df_trial_ridge['param'].apply(lambda x: x['kfold'])

In [48]:
db_kerasmlp = DFDB('../trial/kerasmlp.pkl', auto_commit=False)
df_trial_mlp = db_kerasmlp.select()
df_trial_mlp['kfold'] = df_trial_mlp['param'].apply(lambda x: x['kfold'])

In [49]:
db_frgf = DFDB('../trial/frgf.pkl', auto_commit=False)
df_trial_frgf = db_frgf.select()
df_trial_frgf['kfold'] = df_trial_frgf['param'].apply(lambda x: x['kfold'])

In [50]:
db_stacknet_pca = DFDB('../trial/stacknet_pca.pkl', auto_commit=False)
df_trial_stacknet_pca = db_stacknet_pca.select()
df_trial_stacknet_pca['kfold'] = df_trial_frgf['param'].apply(lambda x: x['kfold'])

In [51]:
trial_lst = [
    df_trial_catboost.loc[452], df_trial_xgbm.loc[1172], df_trial_lgbm.loc[2156],df_trial_gradientboosting.loc[306],
             df_trial_randomforest.loc[297],df_trial_extratrees.loc[459],
            df_trial_knn.loc[17], df_trial_svr.loc[7],df_trial_fm.loc[313],
            df_trial_lasso.loc[8], df_trial_ridge.loc[15],
        df_trial_mlp.loc[13], df_trial_frgf.loc[200],
            ]
name_lst = [
    'cb452', 'xgbm1172', 'lgbm2156',
    'gbm306','rf297','et459',
    'knn17','svr7', 'fm313',
    'lasso8','ridge15',
    'mlp13','frgf200'
]

In [52]:
df_train_oof = pd.DataFrame()
df_test_oof = pd.DataFrame()
for df_, name_ in zip(trial_lst, name_lst):
    try:
        df_test_oof_i = df_['df_test_pred']
        df_test_oof[name_] = np.mean(df_test_oof_i.drop(columns=['index']).values, axis=1)
        
        df_train_oof_i = df_['df_valid_pred']
        df_train_oof[name_] = df_train_oof_i['predict']
        
    except Exception as e:
        print(name_, e.__str__())

df_train_oof['index'] = df_train_oof_i['index']
df_test_oof['index'] = df_test_oof_i['index']

In [53]:
df_train = pd.merge(df_train, df_train_oof, on='index')
df_test = pd.merge(df_test, df_test_oof, on='index')

In [54]:
df_pca1_train = pd.read_pickle('../feats/pca.pkl')
df_pca1_test = pd.read_pickle('../feats/pca_test.pkl')

In [55]:
df_train = pd.concat([df_train, df_pca1_train], axis=1)
df_test = pd.concat([df_test, df_pca1_test], axis=1)

In [56]:
df_spec_train = pd.read_pickle('../feats/spec_features.pkl')
df_spec_test = pd.read_pickle('../feats/spec_features_test.pkl')

In [57]:
df_train = pd.merge(df_train, df_spec_train, on='index')
df_test = pd.merge(df_test, df_spec_test, on='index')

In [199]:
df_tsne_train = pd.read_pickle('../feats/tsne.pkl')
df_tsne_test = pd.read_pickle('../feats/tsne_test.pkl')

In [200]:
df_train = pd.merge(df_train, df_tsne_train, on='index')
df_test = pd.merge(df_test, df_tsne_test, on='index')

In [222]:
df_vae_train = pd.read_pickle('../feats/vae.pkl')
df_vae_test = pd.read_pickle('../feats/vae_test.pkl')

In [223]:
df_train = pd.merge(df_train, df_vae_train, on='index')
df_test = pd.merge(df_test, df_vae_test, on='index')

In [29]:
# mytrial =[]
# columns= param['columns']
# df_his, df_feature_importances, df_valid_pred, df_test_pred = EP.process(df_train[columns+['y','index','group','label']], param, df_test=df_test[columns+['index']], trial=mytrial, is_output_feature_importance=False, remark='rebuild mystacknet_pca_469')

In [57]:
pd.DataFrame(mytrial)[['datetime','nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
0,2019-05-29 12:35:32.727234,47,1.730152,1.5e-05,1.778465,0.000249,0.048313


In [103]:
param = {'columns': ['cb452',
  'xgbm1172',
  'lgbm2156',
  'gbm306',
  'rf297',
  'et459',
  'knn17',
  'svr7',
  'fm313',
  'lasso8',
  'ridge15',
  'mlp13',
  'frgf200',
  'spkt_welch_density__coeff_3',
  'abs_q25_5',
  'q25_roll_std_100',
  'abs_q75_6',
  'abs_q75_7',
  'spkt_welch_densitycoeff_2',
  'abs_q01_4',
  'iqr_6',
  'q05_roll_std_100',
  'q05_roll_std_1000',
  'median__roll_std',
  'abs_q01_5',
  "number_peaks{'n': 10}",
  'FFT_Mag_75q0',
  "value_count{'value': 1}",
  'q01_roll_std_100',
  'abs_q95_2',
  'abs_q95_6',
  'MA_1000MA_std_mean_7',
  'q05_roll_std_10',
  'q01_roll_std_1000',
  'abs_max_roll_mean_1000',
  'abs_q75_2',
  'abs_q05_6',
  '5000std_quantile25',
  "number_crossing_m{'m': 1}",
  "autocorrelation{'lag': 5}",
  'q75_roll_std_10',
  'q05_2',
  '5000smoothness_quantile05',
  '5000variance_median__pca6',
  'abs_q05_6_pca1',
  '5000std_quantile75_pca4',
  "range_count{'min': 0, 'max': 1000000000000.0}_pca1"],
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'stratified'},
 'scaler': {'cls': 'StandardScaler', 'init': {}},
 'algorithm': {'cls': 'cb.CatBoostRegressor',
  'init': {'num_trees': 397,
   'depth': 6,
   'learning_rate': 0.04568702913200931,
   'l2_leaf_reg': 42.67420651153853,
   'bagging_temperature': 0.8288030970057758,
   'random_strength': 0.648173282450518,
   'random_state': 9734,
   'logging_level': 'Silent'},
  'fit': {}}}

In [63]:
mfcc_columns = ['mfcc_5_abs_mean',
 'mfcc_13_mean',
 'mfcc_accelerate_8_variance',
 'mfcc_accelerate_1_kurtosis',
 'mfcc_13_quantile25',
 'mfcc_5_mean']

In [32]:
lgbm_top200 = ['spkt_welch_densitycoeff_2',
 'spkt_welch_density__coeff_3',
 'q25_roll_std_100',
 '3th_peak_freq',
 'min_roll_std_100',
 'q05_roll_std_100',
 'iqr_6',
 'abs_max_8',
 'mfcc_5_abs_mean',
 "number_peaks{'n': 10}",
 'ave10_7',
 'mfcc_13_mean',
 "autocorrelation{'lag': 5}",
 'mfcc_accelerate_8_variance',
 'abs_max_7',
 'mfcc_accelerate_1_kurtosis',
 'q05_roll_std_1000',
 'spkt_welch_density__coeff_42',
 'max_to_min_diff_5',
 'mfcc_13_quantile25',
 'fft_coefficientcoeff_80__attr_"imag"',
 'abs_q25_5',
 'mfcc_5_mean',
 'median__roll_std',
 '5000skewness_max_',
 'fft_coefficientcoeff_6__attr_"abs"',
 'partial_autocorrelationlag_5',
 'abs_min_8',
 'spkt_welch_density__coeff_28',
 'ar_coefficientk_10__coeff_3',
 'abs_q75_7',
 'mfcc_accelerate_15_min',
 'abs_max_4',
 'mfcc_10_quantile25',
 "change_quantiles{'ql': 0.2, 'qh': 0.8, 'isabs': False, 'f_agg': 'var'}",
 'spkt_welch_density__coeff_79',
 "value_count{'value': 1}",
 '5000peak_peak_amp_max_',
 'min__roll_std',
 'mfcc_4_rolling_std_mean',
 'q01_roll_std_1000',
 'mfcc_3_abs_q75',
 'mfcc_5_quantile95',
 'fft_coefficientcoeff_16__attr_"imag"',
 'abs_q01_4',
 'mfcc_6_quantile05',
 'agg_autocorrelationf_agg_"mean"__maxlag_40',
 '5000quantile25skewness_',
 'mfcc_0_abs_q25',
 'mfcc_delta_3_quantile01',
 'spkt_welch_density__coeff_99',
 'mfcc_9_mean',
 'mfcc_12_mean',
 'q75_roll_mean_10',
 'max_to_min',
 'mfcc_3_abs_mean',
 'kurt_7',
 'mfcc_14_quantile01',
 'mfcc_10_mean',
 'mfcc_delta_13_min',
 'mfcc_delta_13_abs_max',
 '5000no_zero_crossing_mean_',
 'abs_q75_6',
 'mfcc_12_quantile99',
 'mfcc_accelerate_6_skewness',
 'mfcc_5_max',
 '5000smoothness_entropy_',
 '5000median_skewness_',
 'mfcc_accelerate_8_min',
 'abs_min_3',
 '5000quantile75mean_',
 "number_crossing_m{'m': 1}",
 '5000smoothness_std_',
 'mfcc_9_abs_q25',
 'flac3_1_quantile99',
 'mfcc_5_skewness',
 'fft_coefficientcoeff_56__attr_"angle"',
 'fft_coefficientcoeff_70__attr_"abs"',
 'mfcc_10_abs_q75',
 'fft_coefficientcoeff_24__attr_"angle"',
 'med_7',
 'spkt_welch_density__coeff_73',
 'abs_q99_8',
 'ave10_6',
 'spkt_welch_density__coeff_38',
 'skew_1',
 'mfcc_delta_3_abs_q95',
 "change_quantiles{'ql': 0.6, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}",
 'max_to_min_5',
 'mfcc_delta_4_quantile75',
 'abs_max_1',
 '5000crest_factor_quantile75',
 'partial_autocorrelationlag_1',
 'mfcc_delta_11_quantile01',
 'mfcc_accelerate_7_quantile01',
 'mfcc_8_rolling_std_mean',
 'q95_roll_mean_100',
 'mfcc_delta_6_rolling_std_mean',
 'flac3_0_min',
 'fft_coefficientcoeff_36__attr_"abs"',
 'iqr_8',
 'max_9',
 '5000smoothness_quantile05',
 'mfcc_1_kurtosis',
 'mfcc_7_abs_q95',
 'spkt_welch_density__coeff_66',
 'spkt_welch_density__coeff_64',
 'mfcc_3_abs_q95',
 '5000skewness_mean_',
 'mfcc_10_abs_mean',
 '5000quantile99quantile01',
 'mean_change_rate',
 'mfcc_accelerate_5_rolling_std_mean',
 'mfcc_accelerate_5_quantile05',
 '5000rms_median_',
 'flac3_1_abs_mean',
 '5000rms_quantile05',
 '5000quantile75quantile25',
 'mfcc_accelerate_12_min',
 "value_count{'value': -1}",
 'mfcc_12_abs_max',
 'abs_min_5',
 'mfcc_5_abs_q99',
 'mfcc_delta_5_quantile01',
 'mfcc_12_quantile75',
 'fft_coefficientcoeff_56__attr_"imag"',
 'spkt_welch_densitycoeff_5',
 'mfcc_0_abs_q05',
 'mfcc_13_quantile75',
 'mfcc_delta_5_min',
 'mfcc_5_quantile99',
 'fft_coefficientcoeff_8__attr_"angle"',
 'spkt_welch_density__coeff_30',
 'mfcc_accelerate_4_max',
 'mfcc_14_median',
 "change_quantiles{'ql': 0.2, 'qh': 0.4, 'isabs': False, 'f_agg': 'var'}",
 'mfcc_5_median',
 'mfcc_accelerate_5_abs_q75',
 'spkt_welch_density__coeff_59',
 'mfcc_accelerate_5_kurtosis',
 'mfcc_delta_9_quantile99',
 'mfcc_5_quantile25',
 'spkt_welch_density__coeff_113',
 'mfcc_2_quantile05',
 'spkt_welch_density__coeff_58',
 'mfcc_5_abs_q75',
 'spkt_welch_density__coeff_22',
 'spkt_welch_density__coeff_115',
 'spkt_welch_density__coeff_4',
 'mfcc_5_abs_q05',
 'spkt_welch_density__coeff_25',
 'mfcc_5_abs_max',
 'spkt_welch_density__coeff_27',
 'mfcc_4_quantile75',
 'mfcc_3_rolling_std_mean',
 'mfcc_4_median',
 'mfcc_0_quantile01',
 '5000quantile75rssq_',
 'fft_coefficientcoeff_24__attr_"imag"',
 'mfcc_delta_12_kurtosis',
 '5000rms_quantile25',
 'mfcc_13_quantile05',
 'fft_coefficientcoeff_8__attr_"imag"',
 'mfcc_delta_11_rolling_std_mean',
 'mfcc_delta_12_quantile99',
 'mfcc_accelerate_9_abs_q25',
 'mfcc_11_abs_mean',
 'mfcc_delta_15_abs_max',
 'abs_max_2',
 'mfcc_11_abs_std',
 'abs_max_roll_mean_1000',
 'peak_to_average_power_ratio__roll_mean',
 'mfcc_13_median',
 'fft_coefficientcoeff_62__attr_"abs"',
 'mfcc_11_quantile75',
 "quantile{'q': 0.8}",
 'mfcc_delta_3_rolling_std_mean',
 'abs_q99_7',
 'mfcc_delta_12_max',
 'mfcc_12_min',
 'mfcc_10_quantile99',
 'mfcc_14_abs_q99',
 'mfcc_14_quantile05',
 'mfcc_14_quantile25',
 'kurt_1',
 'mfcc_2_median',
 'q01_2',
 'mfcc_2_abs_q25',
 'kurt_8',
 'mfcc_accelerate_6_abs_max',
 'mfcc_1_quantile75',
 'q05_5',
 'abs_q95_9',
 'q05_roll_mean_100',
 'mfcc_10_abs_q25',
 'q75_9',
 'mfcc_15_mean',
 "change_quantiles{'ql': 0.6, 'qh': 0.8, 'isabs': True, 'f_agg': 'mean'}",
 'flac3_0_abs_std',
 "change_quantiles{'ql': 0.4, 'qh': 1.0, 'isabs': False, 'f_agg': 'mean'}"]

In [111]:
db_stacknet_pca_mfcc = DFDB('../trial/stacknet_pca_mfcc.pkl', auto_commit=False)

In [206]:
def width_frist_rfe(df_train, param, trial, score, df_test=None, remark=None):

    param_ = copy.deepcopy(param)
    columns_ = param_['columns']
    best_score = score
    best_param = param_
    for col in columns_:
        param_['columns'] = list(set(columns_) - set([col]))
        df_his, df_feature_importances, df_valid_pred, df_test_pred = EP.process(df_train, param_, df_test=df_test, trial=trial, is_output_feature_importance=False, remark=remark)
        val_mae_mean = np.mean(df_his.valid)
        if val_mae_mean<best_score:
            best_score = val_mae_mean
            best_param = copy.deepcopy(param_)

    if best_score < score:
        width_frist_rfe(df_train, best_param, trial, best_score, df_test, remark=remark)

    return

In [146]:
# mytrial =[]

#  tune hypterparameters
def objective(trial):
        
    num_trees = trial.suggest_int('num_trees', 200, 1000)
    depth = trial.suggest_int('depth', 2, 8)
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 0.4)
    l2_leaf_reg = trial.suggest_uniform('l2_leaf_reg', 0.0001, 1)
    bagging_temperature = trial.suggest_uniform('bagging_temperature', .6, 1)
    random_strength = trial.suggest_uniform('random_strength', .001, 1)
    random_state = trial.suggest_int('random_state', 1, 9999)
        
    args={
        'columns':df_trial.loc[452]['param']['columns'],
        'kfold':{
            'n_splits': 3,
            'random_state': 1985,
            'shuffle': True,
            'type': 'group'
        },
        'scaler':{
            'cls':'StandardScaler',
            'init':{}
        },
        'algorithm':{
            'cls':'cb.CatBoostRegressor',
            'init':{
                "num_trees":num_trees,
                "depth":depth,
                "learning_rate":learning_rate,
                "l2_leaf_reg":l2_leaf_reg,
                "bagging_temperature":bagging_temperature,
                "random_strength":random_strength,
                "random_state":random_state,
                'logging_level': 'Silent'
            },
            'fit':{
            },
        }
    }
    
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, args, df_test = df_test, trial=mytrial, remark='tune 452 by group')
    val_mae_mean = np.mean(df_his.valid)
    val_mae_var = np.var(df_his.valid)
    train_mae_mean = np.mean(df_his.train)
    train_mae_var = np.var(df_his.train)
    
    trial.set_user_attr('val_mae', val_mae_mean)
    trial.set_user_attr('train_mae', train_mae_mean)
    trial.set_user_attr('mae_diff', val_mae_mean-train_mae_mean)
    trial.set_user_attr('val_mae_var', val_mae_var)

    return val_mae_mean

study = optuna.create_study()
study.optimize(objective, n_trials=200)

[I 2019-05-31 01:27:32,449] Finished a trial resulted in value: 1.9927253415708508. Current best value is 1.9927253415708508 with parameters: {'num_trees': 753, 'depth': 3, 'learning_rate': 0.18975859665925005, 'l2_leaf_reg': 0.629284386126229, 'bagging_temperature': 0.8126638320528758, 'random_strength': 0.3352169078030277, 'random_state': 9775}.
[I 2019-05-31 01:27:55,467] Finished a trial resulted in value: 2.1126433127864366. Current best value is 1.9927253415708508 with parameters: {'num_trees': 753, 'depth': 3, 'learning_rate': 0.18975859665925005, 'l2_leaf_reg': 0.629284386126229, 'bagging_temperature': 0.8126638320528758, 'random_strength': 0.3352169078030277, 'random_state': 9775}.
[I 2019-05-31 01:28:07,647] Finished a trial resulted in value: 2.0004374867891506. Current best value is 1.9927253415708508 with parameters: {'num_trees': 753, 'depth': 3, 'learning_rate': 0.18975859665925005, 'l2_leaf_reg': 0.629284386126229, 'bagging_temperature': 0.8126638320528758, 'random_stre

[I 2019-05-31 01:35:01,225] Finished a trial resulted in value: 1.953291470329008. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'random_strength': 0.6472400827340041, 'random_state': 4967}.
[I 2019-05-31 01:35:35,164] Finished a trial resulted in value: 1.9529957019654498. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'random_strength': 0.6472400827340041, 'random_state': 4967}.
[I 2019-05-31 01:36:14,644] Finished a trial resulted in value: 2.0672655475491326. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'random

[I 2019-05-31 01:44:35,336] Finished a trial resulted in value: 2.0332889234855798. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'random_strength': 0.6472400827340041, 'random_state': 4967}.
[I 2019-05-31 01:44:44,674] Finished a trial resulted in value: 1.9391776383263777. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'random_strength': 0.6472400827340041, 'random_state': 4967}.
[I 2019-05-31 01:45:09,963] Finished a trial resulted in value: 2.0267720730885377. Current best value is 1.9101504930320103 with parameters: {'num_trees': 888, 'depth': 2, 'learning_rate': 0.02184100431440172, 'l2_leaf_reg': 0.012463959428532062, 'bagging_temperature': 0.875013579069416, 'rando

[I 2019-05-31 01:52:23,173] Finished a trial resulted in value: 1.9109100508839154. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 01:52:31,741] Finished a trial resulted in value: 1.909994024688517. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 01:52:41,722] Finished a trial resulted in value: 1.9145367131426996. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_st

[I 2019-05-31 01:56:45,957] Finished a trial resulted in value: 1.924504883615091. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 01:56:56,206] Finished a trial resulted in value: 1.9794360691612898. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 01:57:02,743] Finished a trial resulted in value: 1.9555497664943564. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_st

[I 2019-05-31 02:02:52,069] Finished a trial resulted in value: 1.9238407314204418. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 02:03:05,760] Finished a trial resulted in value: 1.9213262723118978. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_strength': 0.6703206367929997, 'random_state': 3468}.
[I 2019-05-31 02:03:20,741] Finished a trial resulted in value: 1.9700008203452013. Current best value is 1.9048569542318443 with parameters: {'num_trees': 364, 'depth': 2, 'learning_rate': 0.06382284819310886, 'l2_leaf_reg': 0.7367808693363442, 'bagging_temperature': 0.8710709999600172, 'random_s

[I 2019-05-31 02:08:33,200] Finished a trial resulted in value: 2.040279048918727. Current best value is 1.903360997426227 with parameters: {'num_trees': 507, 'depth': 2, 'learning_rate': 0.04524074282960549, 'l2_leaf_reg': 0.8989954843923115, 'bagging_temperature': 0.8916335954006406, 'random_strength': 0.6932550777889269, 'random_state': 3993}.
[I 2019-05-31 02:08:41,244] Finished a trial resulted in value: 1.921429156359472. Current best value is 1.903360997426227 with parameters: {'num_trees': 507, 'depth': 2, 'learning_rate': 0.04524074282960549, 'l2_leaf_reg': 0.8989954843923115, 'bagging_temperature': 0.8916335954006406, 'random_strength': 0.6932550777889269, 'random_state': 3993}.
[I 2019-05-31 02:08:54,284] Finished a trial resulted in value: 1.9599858100076333. Current best value is 1.903360997426227 with parameters: {'num_trees': 507, 'depth': 2, 'learning_rate': 0.04524074282960549, 'l2_leaf_reg': 0.8989954843923115, 'bagging_temperature': 0.8916335954006406, 'random_streng

[I 2019-05-31 02:12:01,017] Finished a trial resulted in value: 1.9144079116830799. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'random_strength': 0.022972223654142365, 'random_state': 3136}.
[I 2019-05-31 02:12:07,486] Finished a trial resulted in value: 1.9168490011970913. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'random_strength': 0.022972223654142365, 'random_state': 3136}.
[I 2019-05-31 02:12:20,091] Finished a trial resulted in value: 1.9247864839637903. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'rand

[I 2019-05-31 02:16:03,908] Finished a trial resulted in value: 1.9036803436419907. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'random_strength': 0.022972223654142365, 'random_state': 3136}.
[I 2019-05-31 02:16:15,507] Finished a trial resulted in value: 1.9102430587300188. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'random_strength': 0.022972223654142365, 'random_state': 3136}.
[I 2019-05-31 02:16:23,899] Finished a trial resulted in value: 1.93443425372426. Current best value is 1.9022444364602877 with parameters: {'num_trees': 259, 'depth': 2, 'learning_rate': 0.05938404836844259, 'l2_leaf_reg': 0.9067478584716402, 'bagging_temperature': 0.8468389941418171, 'random

In [152]:
for trial_i in mytrial:
    db_stacknet_pca_mfcc.insert(trial_i)
df_trial = db_stacknet_pca_mfcc.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])

In [195]:
db_stacknet_pca_mfcc.commit()

In [154]:
df_trial[(df_trial['remark']=='tune 452 by group')].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
676,2019-05-31 02:11:54.832121,tune 452 by group,26,1.780355,0.000401,1.902244,0.002737,0.121889
641,2019-05-31 02:05:26.875061,tune 452 by group,26,1.763515,0.000355,1.903361,0.003263,0.139846
700,2019-05-31 02:16:03.884126,tune 452 by group,26,1.796836,0.000487,1.90368,0.002783,0.106845
583,2019-05-31 01:52:06.574691,tune 452 by group,26,1.763654,0.00033,1.904857,0.003301,0.141203
642,2019-05-31 02:05:36.766968,tune 452 by group,26,1.735895,0.000307,1.904895,0.002488,0.168999
674,2019-05-31 02:11:43.319159,tune 452 by group,26,1.810265,0.00048,1.906792,0.002413,0.096527
690,2019-05-31 02:13:55.745555,tune 452 by group,26,1.792992,0.000374,1.907058,0.002641,0.114065
661,2019-05-31 02:09:47.581098,tune 452 by group,26,1.793223,0.000365,1.908826,0.00247,0.115603
713,2019-05-31 02:18:42.643689,tune 452 by group,26,1.78213,0.000389,1.908862,0.003732,0.126732
699,2019-05-31 02:15:54.084428,tune 452 by group,26,1.796263,0.000375,1.90962,0.003105,0.113356


In [242]:
param674 = {'columns': ['mfcc_5_abs_mean',
  'mlp13',
  "number_crossing_m{'m': 1}",
  '5000variance_median__pca6',
  'xgbm1172',
  'ridge15',
  'spkt_welch_density__coeff_3',
  'lgbm2156',
  'lasso8',
  'mfcc_accelerate_8_variance',
  'abs_q05_6_pca1',
  'abs_q75_7',
  'abs_q01_5',
  'frgf200',
  'median__roll_std',
  'abs_max_roll_mean_1000',
  'knn17',
  "range_count{'min': 0, 'max': 1000000000000.0}_pca1",
  'gbm306',
  'q01_roll_std_1000',
  'spkt_welch_densitycoeff_2',
  'mfcc_accelerate_1_kurtosis',
  'svr7',
  "value_count{'value': 1}",
  'abs_q95_2',
  'cb452'],
 'kfold': {'n_splits': 3,
  'random_state': 1985,
  'shuffle': True,
  'type': 'group'},
 'scaler': {'cls': 'StandardScaler', 'init': {}},
 'algorithm': {'cls': 'cb.CatBoostRegressor',
  'init': {'num_trees': 259,
   'depth': 2,
   'learning_rate': 0.05938404836844259,
   'l2_leaf_reg': 0.9067478584716402,
   'bagging_temperature': 0.8468389941418171,
   'random_strength': 0.022972223654142365,
   'random_state': 3136,
   'logging_level': 'Silent'},
  'fit': {}}}

In [249]:
idx=722
df_test_pred = df_trial.loc[idx]['df_test_pred']
df_submit = pd.DataFrame()
df_submit['time_to_failure'] = np.mean(df_test_pred.drop(columns=['index']).values, axis=1)
df_submit['seg_id'] = df_test_pred['index']
df_submit.to_csv('submission_mystacknet_pca_mfcc_{}.csv'.format(idx), index=False)

In [210]:
tsne_columns = ['tsne_1', 'tsne_2', 'tsne_3']

In [213]:
vae_columns = ['kle', 'loss', 'mse', 'reconstructione', 'z_0', 'z_1']

In [244]:
mytrial = []
for col in ['tsne_1','tsne_2','tsne_3','kle','loss','mse','reconstructione', 'z_0','z_1']:
    param = copy.deepcopy(param674)
    param['columns'] = param['columns'] + [col]
    df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial, remark='try add tsne, vae')

In [246]:
for trial_i in mytrial:
    db_stacknet_pca_mfcc.insert(trial_i)
df_trial = db_stacknet_pca_mfcc.select()
df_trial['kfold'] = df_trial['param'].apply(lambda x: x['kfold'])

In [247]:
df_trial[(df_trial['remark']=='try add tsne, vae')].sort_values(by=['val_mae'])[['datetime','remark', 'nfeatures', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

Unnamed: 0,datetime,remark,nfeatures,train_mae,train_mae_var,val_mae,val_mae_var,mae_diff
722,2019-05-31 14:16:44.465849,"try add tsne, vae",27,1.780584,0.000391,1.897044,0.003055,0.11646
724,2019-05-31 14:17:10.325755,"try add tsne, vae",27,1.781385,0.000408,1.902547,0.002945,0.121162
723,2019-05-31 14:16:57.541839,"try add tsne, vae",27,1.780882,0.000396,1.903822,0.002902,0.12294
725,2019-05-31 14:17:23.283208,"try add tsne, vae",27,1.780876,0.000396,1.903833,0.002901,0.122957
721,2019-05-31 14:16:31.421177,"try add tsne, vae",27,1.778064,0.000293,1.907047,0.003166,0.128983
727,2019-05-31 14:17:49.128471,"try add tsne, vae",27,1.780191,0.000413,1.907112,0.003039,0.126921
719,2019-05-31 14:16:05.978858,"try add tsne, vae",27,1.776197,0.00053,1.915775,0.004944,0.139578
720,2019-05-31 14:16:18.657369,"try add tsne, vae",27,1.775702,0.000259,1.92114,0.002807,0.145438
726,2019-05-31 14:17:36.204076,"try add tsne, vae",27,1.771279,0.000431,1.964283,0.003302,0.193004


In [248]:
db_stacknet_pca_mfcc.commit()