In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold, TimeSeriesSplit

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb

import tensorflow as tf
import keras
from keras.wrappers.scikit_learn import KerasRegressor
from keras import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional, CuDNNGRU, CuDNNLSTM, RepeatVector, RepeatVector, concatenate,ConvLSTM2D
from keras.layers import BatchNormalization, Conv2D, MaxPooling2D, Flatten, Convolution1D,TimeDistributed,Lambda, Activation, GlobalAveragePooling1D, GlobalMaxPooling1D
from keras.engine.topology import Layer
from keras.initializers import Ones, Zeros

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from models import *
from dfdb import DFDB

import types
import os
import copy
import gc

import seaborn as sns
import matplotlib.pyplot as plt

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
# os.environ["CUDA_VISIBLE_DEVICES"]="3";  

Using TensorFlow backend.


In [2]:
pd.set_option('display.max_colwidth', -1)

In [3]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [4]:
def m_lineplot(dflist, plot_features=None, n_col=3):

    n_chart = len(dflist)
    n_row = int(n_chart/n_col) if n_chart % n_col == 0 else n_row+1
        
    fig = plt.figure(figsize=(5*n_col, 3*n_row))
    for i, df in enumerate(dflist):
        ax = fig.add_subplot(n_row, n_col, i+1)
        if type(plot_features) == type(None):
            plot_features = df.columns.tolist()
        for feat in plot_features:
            sns.lineplot(x=df.index, y=df[feat], ax=ax)
    return 

In [5]:
df_X_train = pd.read_pickle('../feats/sample.pkl')
df_wav = pd.read_csv('../data/train.csv', dtype={'acoustic_data': np.float32, 'time_to_failure': np.float32})
wav = df_wav['acoustic_data'].values
ttf = df_wav['time_to_failure'].values
wav_mean = df_wav['acoustic_data'].mean()
wav_std = df_wav['acoustic_data'].std()
wav = (wav-wav_mean)/wav_std
df_X_train['X'] = df_X_train['index'].apply(lambda x: np.expand_dims(wav[x:x+150_000], 1))
del df_wav
del wav
del ttf

In [19]:
gc.collect()

427

In [6]:
df_X_train['label'] = df_X_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_X_train['season'].values
group[np.where(group==17)[0]] = 1
df_X_train['group'] = group
df_X_train = df_X_train.drop(columns=['season'])

In [7]:
test_file_path = '../data/test/'
X_test = []
for f in sorted(os.listdir(test_file_path)):
    df_test_i = pd.read_csv(test_file_path+f)
    X_test.append({'index':f.replace('.csv',''), 'X':(df_test_i['acoustic_data'].values-wav_mean)/wav_std})
df_X_test = pd.DataFrame(X_test)
del X_test

In [8]:
db = DFDB('../trial2/kerascnnwav.pkl', auto_commit=False)
db.select().shape

(0, 0)

In [9]:
# try:
#     from dataloader import TokenList, pad_to_longest
#     # for transformer
# except: pass

# class LayerNormalization(Layer):
#     def __init__(self, eps=1e-6, **kwargs):
#         self.eps = eps
#         super(LayerNormalization, self).__init__(**kwargs)
#     def build(self, input_shape):
#         self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
#                                      initializer=Ones(), trainable=True)
#         self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
#                                     initializer=Zeros(), trainable=True)
#         super(LayerNormalization, self).build(input_shape)
#     def call(self, x):
#         mean = K.mean(x, axis=-1, keepdims=True)
#         std = K.std(x, axis=-1, keepdims=True)
#         return self.gamma * (x - mean) / (std + self.eps) + self.beta
#     def compute_output_shape(self, input_shape):
#         return input_shape

# class ScaledDotProductAttention():
#     def __init__(self, d_model, attn_dropout=0.1):
#         self.temper = np.sqrt(d_model)
#         self.dropout = Dropout(attn_dropout)
#     def __call__(self, q, k, v, mask):
#         attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
#         if mask is not None:
#             mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
#             attn = Add()([attn, mmask])
#         attn = Activation('softmax')(attn)
#         attn = self.dropout(attn)
#         output = Lambda(lambda x:K.batch_dot(x[0], x[1]))([attn, v])
#         return output, attn

# class MultiHeadAttention():
#     # mode 0 - big martixes, faster; mode 1 - more clear implementation
#     def __init__(self, n_head, d_model, d_k, d_v, dropout, mode=0, use_norm=True):
#         self.mode = mode
#         self.n_head = n_head
#         self.d_k = d_k
#         self.d_v = d_v
#         self.dropout = dropout
#         if mode == 0:
#             self.qs_layer = Dense(n_head*d_k, use_bias=False)
#             self.ks_layer = Dense(n_head*d_k, use_bias=False)
#             self.vs_layer = Dense(n_head*d_v, use_bias=False)
#         elif mode == 1:
#             self.qs_layers = []
#             self.ks_layers = []
#             self.vs_layers = []
#             for _ in range(n_head):
#                 self.qs_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
#                 self.ks_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
#                 self.vs_layers.append(TimeDistributed(Dense(d_v, use_bias=False)))
#         self.attention = ScaledDotProductAttention(d_model)
#         self.layer_norm = LayerNormalization() if use_norm else None
#         self.w_o = TimeDistributed(Dense(d_model))

#     def __call__(self, q, k, v, mask=None):
#         d_k, d_v = self.d_k, self.d_v
#         n_head = self.n_head

#         if self.mode == 0:
#             qs = self.qs_layer(q)  # [batch_size, len_q, n_head*d_k]
#             ks = self.ks_layer(k)
#             vs = self.vs_layer(v)

#             def reshape1(x):
#                 s = tf.shape(x)   # [batch_size, len_q, n_head * d_k]
#                 x = tf.reshape(x, [s[0], s[1], n_head, d_k])
#                 x = tf.transpose(x, [2, 0, 1, 3])  
#                 x = tf.reshape(x, [-1, s[1], d_k])  # [n_head * batch_size, len_q, d_k]
#                 return x
#             qs = Lambda(reshape1)(qs)
#             ks = Lambda(reshape1)(ks)
#             vs = Lambda(reshape1)(vs)

#             if mask is not None:
#                 mask = Lambda(lambda x:K.repeat_elements(x, n_head, 0))(mask)
#             head, attn = self.attention(qs, ks, vs, mask=mask)  
                
#             def reshape2(x):
#                 s = tf.shape(x)   # [n_head * batch_size, len_v, d_v]
#                 x = tf.reshape(x, [n_head, -1, s[1], s[2]]) 
#                 x = tf.transpose(x, [1, 2, 0, 3])
#                 x = tf.reshape(x, [-1, s[1], n_head*d_v])  # [batch_size, len_v, n_head * d_v]
#                 return x
#             head = Lambda(reshape2)(head)
#         elif self.mode == 1:
#             heads = []; attns = []
#             for i in range(n_head):
#                 qs = self.qs_layers[i](q)   
#                 ks = self.ks_layers[i](k) 
#                 vs = self.vs_layers[i](v) 
#                 head, attn = self.attention(qs, ks, vs, mask)
#                 heads.append(head); attns.append(attn)
#             head = Concatenate()(heads) if n_head > 1 else heads[0]
#             attn = Concatenate()(attns) if n_head > 1 else attns[0]

#         outputs = self.w_o(head)
#         outputs = Dropout(self.dropout)(outputs)
#         if not self.layer_norm: return outputs, attn
#         # outputs = Add()([outputs, q]) # sl: fix
#         return self.layer_norm(outputs), attn
    
# class ReshapeStandardScaler(object):
    
#     def  __init__(self, shape, mean, std):
        
#         assert shape[-1] == len(std.shape), 'the shape is not matched'
#         assert shape[-1] == len(mean.shape), 'the shape is not matched'
#         self.shape = shape
#         self.std = std
#         self.mean = mean
#         return
    
#     def fit(self, *args, **kwargs):
#         return
    
#     def transform(self, X):
#         original_shape = X.shape
#         X = (X.reshape(self.shape) - self.mean)/self.std
#         return X.reshape(original_shape)
    
# def create_path(base_dir, param):
#     if base_dir == None:
#         return None
#     fold_path = base_dir + '/' + ','.join("{!s}={!r}".format(key,val) for (key,val) in param.items())
#     if not os.path.exists(fold_path):
#         os.makedirs(fold_path)
#     return fold_path

# class ReshapeStandardScaler(object):
    
#     def  __init__(self, shape, mean, std):
        
#         assert shape[-1] == len(std.shape), 'the shape is not matched'
#         assert shape[-1] == len(mean.shape), 'the shape is not matched'
#         self.shape = shape
#         self.std = std
#         self.mean = mean
#         return
    
#     def fit(self, **kwargs):
#         return
    
#     def transform(self, X):
#         original_shape = X.shape
#         X = (X.reshape(self.shape) - self.mean)/self.std
#         return X.reshape(original_shape)

# class Keras1DCnnRegressor(object):
    
#     def __init__(self, batch, timesteps, input_dim, cnn_layer_sizes, cnn_kernel_size, cnn_strides, cnn_activation, 
#                     fc_layer_sizes, fc_activation, dropout, solver, metric, lr, sgd_momentum, sgd_decay, base_save_dir, alias, 
#                  attention_n_head=5, attention_d_model=256, attention_d_k=64, attention_d_v=64, bilstm_layer_sizes=[]):
        
#         self.batch = batch
#         self.timesteps = timesteps
#         self.input_dim = input_dim
#         self.cnn_layer_sizes = cnn_layer_sizes
#         self.cnn_kernel_size = cnn_kernel_size
#         self.cnn_strides = cnn_strides
#         self.cnn_activation = cnn_activation
#         self.fc_layer_sizes = fc_layer_sizes
#         self.fc_activation = fc_activation
#         self.dropout = dropout
#         self.solver = solver
#         self.metric = metric
#         self.lr = lr
#         self.sgd_momentum = sgd_momentum
#         self.sgd_decay = sgd_decay
        
#         self.regressor = self.build_graph(timesteps, input_dim, cnn_layer_sizes, cnn_kernel_size, cnn_strides, cnn_activation, 
#                     fc_layer_sizes, fc_activation, attention_n_head, attention_d_model, attention_d_k, attention_d_v, bilstm_layer_sizes, dropout)
#         self.compile_graph(self.regressor, solver, metric, lr, sgd_momentum, sgd_decay)
        
#         self.alias = alias
#         self.base_save_dir = base_save_dir
#         if (self.alias==None) & (self.base_save_dir==None):
#             self.chkpt = None
#         else:
#             self.chkpt = os.path.join(base_save_dir,'{}.hdf5'.format(alias))

#         return
    
#     def build_graph(self, timesteps, input_dim, cnn_layer_sizes, cnn_kernel_size, cnn_strides, cnn_activation, 
#                     fc_layer_sizes, fc_activation, attention_n_head, attention_d_model, attention_d_k, attention_d_v, bilstm_layer_sizes, dropout):
        
#         i = Input(shape = (timesteps, input_dim))
#         x = Convolution1D( cnn_layer_sizes[0], kernel_size = cnn_kernel_size, strides = cnn_strides, activation=cnn_activation)(i)
#         x = BatchNormalization()(x)
#         x = Dropout(dropout)(x)
#         for units in cnn_layer_sizes[1:]:
#             x = Convolution1D(units, kernel_size = cnn_kernel_size, strides = cnn_strides, activation=cnn_activation)(x)
#             x = BatchNormalization()(x)
#             x = Dropout(dropout)(x)
#         for units in bilstm_layer_sizes:
#             x = Bidirectional(CuDNNLSTM(units, return_sequences=True))(x)
#         x, slf_attn = MultiHeadAttention(n_head=attention_n_head, d_model=attention_d_model, d_k=attention_d_k, d_v=attention_d_v, dropout=dropout)(x, x, x)
#         avg_pool = GlobalAveragePooling1D()(x)
#         max_pool = GlobalMaxPooling1D()(x)
#         x = concatenate([avg_pool, max_pool])
#         for units in fc_layer_sizes[:-1]:
#             x = Dense(units, activation=fc_activation)(x)
#             x = BatchNormalization()(x)
#             x = Dropout(dropout)(x)
#         x = Dense(fc_layer_sizes[-1], activation=fc_activation)(x)
#         x = BatchNormalization()(x)
#         y = Dense(1)(x)
#         regressor = Model(inputs = [i], outputs = [y])
#         return regressor
    
#     def compile_graph(self, model, solver, metric, lr, momentum, decay):
#         if solver=='adam':
#             optimizer = optimizers.adam(lr=lr)
#         elif solver=='sgd':
#             optimizer = optimizers.SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True)
#         model.compile(optimizer=optimizer, loss=metric)
#         return
    
#     def fit_generator(self, train_gen, eval_set, verbose=1, epochs=200):
        
        
#         df_train_his = pd.DataFrame()
# #         prev_val_loss = 999999
#         for i in np.arange(epochs):
#             if type(eval_set)==type(None):
#                 validation_data = None
#             else:
#                 validation_data = eval_set[0]
#             his_train = self.regressor.fit_generator( generator =  train_gen,  epochs = 1,  verbose = 0,  validation_data = validation_data, callbacks = [])
#             df_train_his_i = pd.DataFrame(his_train.history)
#             df_train_his_i['epochs'] = i
#             df_train_his = pd.concat([df_train_his, df_train_his_i], axis=0)
            
#             if verbose > 0:
#                 if validation_data == None:
#                     print(df_train_his_i.epochs.values, df_train_his_i.loss.values)
#                 else:
#                     print(df_train_his_i.epochs.values, df_train_his_i.loss.values, df_train_his_i.val_loss.values)
                
# #             if (df_train_his_i.val_loss.values[0] < prev_val_loss) & (self.chkpt!=None) :
# #                 prev_val_loss = df_train_his_i.val_loss.values[0]
# #                 self.regressor.save_weights(self.chkpt)
        
#         df_train_his.to_csv(self.base_save_dir + '/train_his.csv', index=True)
#         return
    
#     def fit(self, X_train, y_train, eval_set, verbose=1, epochs=200):
              
#         df_train_his = pd.DataFrame()
# #         prev_val_loss = 999999
#         for i in np.arange(epochs):
#             if type(eval_set)==type(None):
#                 validation_data = None
#             else:
#                 validation_data = eval_set[0]
#                 assert type(eval_set[0])==tuple, 'validation_data[0] is not a tuple'
#             his_train = self.regressor.fit( X_train, y_train, epochs = 1,  verbose = 0,  batch_size = self.batch,  validation_data = validation_data,  callbacks = [])
#             df_train_his_i = pd.DataFrame(his_train.history)
#             df_train_his_i['epochs'] = i
#             df_train_his = pd.concat([df_train_his, df_train_his_i], axis=0)
            
#             if verbose > 0:
#                 if validation_data == None:
#                     print(df_train_his_i.epochs.values, df_train_his_i.loss.values)
#                 else:
#                     print(df_train_his_i.epochs.values, df_train_his_i.loss.values, df_train_his_i.val_loss.values)
                
# #             if (df_train_his_i.val_loss.values[0] < prev_val_loss) & (self.chkpt!=None) :
# #                 prev_val_loss = df_train_his_i.val_loss.values[0]
# #                 self.regressor.save_weights(self.chkpt)
                
#         df_train_his.to_csv(self.base_save_dir + '/train_his.csv', index=True)
            
#         return df_train_his
    
#     def predict(self, X):
#         return self.regressor.predict(X)[:,0]
    

In [10]:
# class Generator(keras.utils.Sequence):

#     def __init__(self, x, y, x_mean, x_std, start_indexes, ts_length, batch_size, steps_per_epoch, shaking=True, shuffle=True):
#         self.x = x
#         self.y = y
#         self.start_indexes = start_indexes
#         self.ts_length = ts_length
#         self.batch_size = batch_size
#         self.steps_per_epoch = steps_per_epoch
#         self.x_mean = x_mean
#         self.x_std = x_std
#         self.shaking = shaking
#         self.shuffle = shuffle
#         self.point = 0
        
#     def __len__(self):
#         return self.steps_per_epoch

#     def __getitem__(self, idx):
        
#         if self.shuffle:
#             start_indexes_epoch = np.random.choice(self.start_indexes, size=self.batch_size)
#         else:
#             start_indexes_epoch = self.start_indexes[self.point:self.point+self.batch_size]
#             self.point += self.batch_size
#             if self.point > len(self.start_indexes) - self.batch_size:
#                 self.point = 0
            
#         if self.shaking:
#             shifts = np.random.randint(0, int(self.ts_length*.2), size=self.batch_size) - int(self.ts_length*.1)
#         else:
#             shifts = np.zeros(self.batch_size)
            
#         x_batch = np.empty((self.batch_size, self.ts_length))
#         y_batch = np.empty(self.batch_size, )

#         for i, start_idx in enumerate(start_indexes_epoch):
#             end = start_idx + shifts[i] + self.ts_length
#             if end < self.ts_length:
#                 end = self.ts_length
#             if end >= self.x.shape[0]:
#                 end = self.x.shape[0]
#             x_i = self.x[int(end-self.ts_length):int(end)]
#             x_batch[i, :] = x_i
#             y_batch[i] = self.y[int(end - 1)]
            
#         x_batch = (x_batch - self.x_mean)/self.x_std

#         return np.expand_dims(x_batch, axis=2), y_batch

In [11]:
# def processG(df_index, x, y, param, df_test=None, trial=None, remark=None):

#         assert 'y' in df_index.columns.tolist(), 'y is not in df_index'
#         assert 'index' in df_index.columns.tolist(), 'index is not in df_index'
#         assert 'index' not in param['columns'], 'index is in features'
#         assert 'y' not in param['columns'], 'y is in features'
#         assert 'label' not in param['columns'], 'label is in features'
#         assert 'group' not in param['columns'], 'group is in features'
#         assert EP.check_param(param), 'param format is not right '
#         assert (type(trial) == list) | (trial == None), 'trial is neither list nor none'


#         df_test_pred = None
#         if type(df_test) == pd.DataFrame:
#             assert 'index' in df_test.columns.tolist(), 'index is not in df_test'
#             df_test_pred = pd.concat([df_test_pred, df_test[['index']]], axis=1)

#         history = []
#         df_valid_pred = pd.DataFrame()

#         # stratified,group,timeseries
#         if 'splits' in param['kfold']:
#             splits = param['kfold']['splits']
#         else:
#             if param['kfold']['type'] == 'stratified':
#                 assert 'label' in df_index.columns.tolist(), 'label is not in df_index'
#                 folds = StratifiedKFold(n_splits=param['kfold']['n_splits'], shuffle=param['kfold']['shuffle'],
#                                         random_state=param['kfold']['random_state'])
#                 splits = list(folds.split(df_index, df_index['label']))
#             elif param['kfold']['type'] == 'group':
#                 assert 'group' in df_index.columns.tolist(), 'group is not in df_index'
#                 folds = GroupKFold(n_splits=param['kfold']['n_splits'])
#                 splits = list(folds.split(df_index, groups=df_index['group']))
#             elif param['kfold']['type'] == 'timeseries':
#                 folds = TimeSeriesSplit(n_splits=param['kfold']['n_splits'])
#                 splits = list(folds.split(df_index))
#             else:
#                 folds = KFold(n_splits=param['kfold']['n_splits'], shuffle=param['kfold']['shuffle'],
#                               random_state=param['kfold']['random_state'])
#                 splits = list(folds.split(df_index))


#         regressor_cls = EP.str2class(param['algorithm']['cls'])
        
#         x_mean = np.mean(x)
#         x_std = np.std(x)
#         epochs = param['algorithm']['fit']['epochs']
#         batch_size = param['algorithm']['init']['batch']
        
#         for fold_n, (train_index, valid_index) in enumerate(splits):
            
#             start_indexes_train = df_index['index'].values[train_index]
#             start_indexes_valid = df_index['index'].values[valid_index]
#             train_gen = Generator(x=x, y=y, x_mean=x_mean, x_std=x_std, start_indexes=start_indexes_train, ts_length=150000, batch_size=batch_size, steps_per_epoch=epochs)
#             valid_gen = Generator(x=x, y=y, x_mean=x_mean, x_std=x_std, start_indexes=start_indexes_valid, ts_length=150000, batch_size=len(start_indexes_valid), steps_per_epoch=1, shaking=False, shuffle=False)
            
#             fit_param = param['algorithm']['fit'].copy()
#             if 'eval_set' in fit_param:
#                 fit_param['eval_set'] = [valid_gen]
            
#             algorithm_init_param = param['algorithm']['init'].copy()
#             if 'alias' in list(algorithm_init_param.keys()):
#                 algorithm_init_param['alias'] = algorithm_init_param['alias'] + '_{}'.format(fold_n)
#             model = regressor_cls(**algorithm_init_param)
            
#             model.fit_generator(train_gen, **fit_param)
#             y_valid_pred = model.predict_generator(valid_gen)
#             y_train_pred = model.predict_generator(train_gen)

#             original_index = df_index['index'].values[valid_index]
#             df_valid_pred_i = pd.DataFrame({'index': original_index, 'predict': y_valid_pred, 'fold_n': np.zeros(y_valid_pred.shape[0]) + fold_n})
#             df_valid_pred = pd.concat([df_valid_pred, df_valid_pred_i], axis=0)

#             if type(df_test) == pd.DataFrame:
                
#                 X_test = np.array(df_test['X'].values.tolist())
#                 X_test =  (X_test - x_mean)/x_std
#                 y_test_pred = model.predict(X_test)
#                 df_test_pred_i = pd.DataFrame({fold_n: y_test_pred})
#                 df_test_pred = pd.concat([df_test_pred, df_test_pred_i], axis=1)

#             history.append({'fold_n': fold_n, 'train': mean_absolute_error(y_train, y_train_pred), 'valid': mean_absolute_error(y_valid, y_valid_pred)})

#         df_his = pd.DataFrame(history)
#         df_valid_pred = df_valid_pred.sort_values(by=['index'])
#         df_valid_pred = df_valid_pred.reset_index(drop=True)

#         if type(df_test) == pd.DataFrame:
#             df_test_pred = df_test_pred.sort_values(by=['index'])
#             df_test_pred = df_test_pred.reset_index(drop=True)

#         if type(trial) == list:
#             pid_ = os.getpid()
#             datetime_ = datetime.datetime.now()
#             connection_file = os.path.basename(kernel.get_connection_file())
#             val_mae_mean = np.mean(df_his.valid)
#             val_mae_var = np.var(df_his.valid)
#             train_mae_mean = np.mean(df_his.train)
#             train_mae_var = np.var(df_his.train)

#             trial.append({'datetime': datetime_, 'kernel': connection_file, 'remark': remark, 'val_mae': val_mae_mean,
#                           'train_mae': train_mae_mean, 'val_mae_var': val_mae_var, 'train_mae_var': train_mae_var,
#                           'mae_diff': val_mae_mean - train_mae_mean,
#                           'df_his': df_his, 
#                           'df_valid_pred': df_valid_pred, 'df_test_pred': df_test_pred, 'param': param.copy(),
#                           'nfeatures': len(columns)})

#         return df_his, _, df_valid_pred, df_test_pred

In [12]:
# from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold, TimeSeriesSplit

# wav = df_wav['acoustic_data'].values
# ttf = df_wav['time_to_failure'].values
# wav_mean = df_wav['acoustic_data'].mean()
# wav_std = df_wav['acoustic_data'].std()

# model = Keras1DCnnRegressor(**param['algorithm']['init'])

# folds = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
# splits = list(folds.split(df_X_train, df_X_train['label']))
# for n_fold, (train_index, valid_index) in enumerate(splits):
#     train_gen = Generator(x=wav, y=ttf, x_mean=wav_mean, x_std=wav_std, start_indexes=df_X_train['index'].values[train_index], ts_length=150000, batch_size=128, steps_per_epoch=1, shaking=True)
# #     valid_gen = Generator(x=wav, y=ttf, x_mean=wav_mean, x_std=wav_std, start_indexes=df_X_train['index'].values[valid_index], ts_length=150000, batch_size=128, steps_per_epoch=100, shaking=False)

# model.fit_generator(train_gen, epochs=1, eval_set=None)

In [16]:
scaler_shape = (-1,1)
path_param={
    'timesteps':150_000, 
    'input_dim':1, 
    'cnn_layer_sizes':[16,64], 
    'cnn_kernel_size':10, 
    'cnn_strides':10, 
    'cnn_activation':'relu',
    'fc_layer_sizes':[1024,16],
    'fc_activation':'relu', 
    'bilstm_layer_sizes':[],
    'dropout':.3,
}
base_save_dir = create_path('Keras1DCnnRegressor', path_param)
param={
    'algorithm': {
        'cls': 'Keras1DCnnRegressor',
        'fit': {
            'verbose':1, 
            'epochs':50, 
            'eval_set':()
        },
        'init': {
            'batch':16, 
            'solver':'adam', 
            'metric':'mean_absolute_error', 
            'lr':.0001, 
            'sgd_momentum':.9, 
            'sgd_decay':0.0001,
            'base_save_dir':base_save_dir, 
            'alias':'1dcnn_wav',
            **path_param
        }
    },
    'columns': ['X'],
    'kfold': {
        'n_splits': 3,
        'random_state': 1985,
        'shuffle': True,
        'type': 'group'#stratified
    },
    'scaler': None
#     {
#         'cls': None,
#         'init':{
#             'shape':scaler_shape,
#             'mean':np.array([wav_mean]),
#             'std':np.array([wav_std]),
#         }
#     }
}

In [17]:
# mytrial = []
# df_his,  df_feature_importances, df_valid_pred, df_test_pred =  processG(df_X_train, wav, ttf, param, df_test = df_X_test, trial=mytrial)

In [18]:
# run one try
mytrial = []
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_X_train, param, df_test = df_X_test, trial=mytrial)
db.insert(mytrial[0])
df_trial = db.select()

[0] [5.25204812] [7.97254419]
[1] [4.05684487] [4.68224865]


KeyboardInterrupt: 

In [None]:
df_trial['kfold-type'] = df_trial['param'].apply(lambda x: x['kfold']['type'])
df_trial['algorithm-init'] = df_trial['param'].apply(lambda x: x['algorithm']['init'])
df_trial[['datetime','nfeatures', 'kfold-type', 'algorithm-init', 'train_mae','train_mae_var','val_mae','val_mae_var','mae_diff']]

In [None]:
df_his_list = [pd.read_csv(param['algorithm']['init']['base_save_dir'] + '/{}_{}_train_his.csv'.format(param['algorithm']['init']['alias'], i), index_col=0) for i in range(param['kfold']['n_splits'])]

In [None]:
m_lineplot(df_his_list)

In [82]:
db.commit()