In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import lightgbm as lgb
import catboost as cb
from keras.wrappers.scikit_learn import KerasRegressor
from keras import Sequential
from keras.layers import Dense

from tsfresh.examples import load_robot_execution_failures
from tsfresh import extract_features, select_features
import optuna

from common import EP
from models import *

import types
import os

Using TensorFlow backend.


In [2]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [3]:
df_train = pd.read_pickle('../feats/df_train.pkl')
df_test = pd.read_pickle('../feats/df_test.pkl')

In [4]:
df_train['label'] = df_train['y'].apply(lambda x:  int(x) if x<15 else 15)
group = df_train['season'].values
group[np.where(group==17)[0]] = 1
df_train['group'] = group
df_train = df_train.drop(columns=['season'])

In [5]:
test_X = df_train.drop(columns=['y','index','group','label']).copy()
test_X.index = df_train['index']
test_y = df_train['y'].copy()
test_y.index = df_train['index']
tsfresh_columns = select_features(test_X, test_y).columns.tolist()

In [6]:
len(tsfresh_columns)

1071

In [7]:
original_columns = df_train.columns.drop(['index','y','label','group']).tolist()

In [8]:
# def create_path(base_dir, param):
#     if base_dir == None:
#         return None
#     fold_path = base_dir + '/' + ','.join("{!s}={!r}".format(key,val) for (key,val) in param.items())
#     if not os.path.exists(fold_path):
#         os.makedirs(fold_path)
#     return fold_path

# class KerasMLPRegressor(object):
    
#     def __init__(self, batch, input_dim, hidden_layer_sizes, activation, dropout, solver, metric, lr, sgd_momentum, sgd_decay, base_save_dir, alias):
        
#         self.batch = batch
#         self.input_dim = input_dim
#         self.hidden_layer_sizes = hidden_layer_sizes
#         self.activation = activation
#         self.solver = solver
#         self.metric = metric
#         self.dropout = dropout
#         self.lr = lr
#         self.sgd_momentum = sgd_momentum
#         self.sgd_decay = sgd_decay
        
#         self.regressor = self.build_graph(input_dim, hidden_layer_sizes, activation, dropout)
#         self.compile_graph(self.regressor, solver, metric, lr, sgd_momentum, sgd_decay)
        
#         self.alias = alias
#         self.base_save_dir = base_save_dir
#         if (self.alias==None) & (self.base_save_dir==None):
#             self.chkpt = None
#         else:
#             self.chkpt = os.path.join(base_save_dir,'{}.hdf5'.format(alias))

#         return
    
#     def build_graph(self, input_dim, hidden_layer_sizes, activation, dropout):
    
#         print(input_dim,hidden_layer_sizes,activation,dropout)
#         i = Input(shape = (input_dim,))
#         x = Dense(hidden_layer_sizes[0], activation=activation)(i)
#         x = BatchNormalization()(x)
#         x = Dropout(dropout)(x)
#         for units in hidden_layer_sizes[1:-1]:
#             x = Dense(units, activation=activation)(x)
#             x = BatchNormalization()(x)
#             x = Dropout(dropout)(x)
#         x = Dense(units, activation=activation)(x)
#         x = BatchNormalization()(x)
#         y = Dense(1)(x)
#         regressor = Model(inputs = [i], outputs = [y])
#         return regressor
    
#     def compile_graph(self, model, solver, metric, lr, momentum, decay):
#         if solver=='adam':
#             optimizer = optimizers.adam(lr=lr)
#         elif solver=='sgd':
#             optimizer = optimizers.SGD(lr=lr, decay=decay, momentum=momentum, nesterov=True)
#         model.compile(optimizer=optimizer, loss=metric)
#         return
    
#     def fit(self, X_train, y_train, eval_set, versbose=1, epochs=200, early_stopping_rounds=20):
        
# #         reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=early_stopping_rounds//4, min_lr=self.lr*1e-2)
#         es_cb = EarlyStopping(monitor='val_loss', patience=early_stopping_rounds, verbose=1, mode='auto')
#         cp_cb = ModelCheckpoint(filepath = self.chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

# #         his_train = self.regressor.fit_generator( generator =  train_gen, epochs = epochs,  verbose = 1,  validation_data = validation, callbacks = [cp_cb])
#         his_train = self.regressor.fit( X_train, y_train, epochs = epochs,  verbose = 1,  validation_data = eval_set[0], callbacks = [cp_cb,es_cb])
#         df_train_his = pd.DataFrame(his_train.history)
        
# #         df_train_his = pd.DataFrame()
# #         prev_val_loss = 999999
# #         for i in np.arange(epochs):
# #             his_train = self.regressor.fit( X_train, y_train, epochs = 1,  verbose = versbose,  batch_size = self.batch,  validation_data = validation,  callbacks = [])
# #             df_train_his_i = pd.DataFrame(his_train.history)
# #             df_train_his_i['epochs'] = i+1
# #             df_train_his = pd.concat([df_train_his, df_train_his_i], axis=0)
# #             if (df_train_his_i.val_loss.values[0] < prev_val_loss) & (self.chkpt!=None):
# #                 prev_val_loss = df_train_his_i.val_loss.values[0]
# #                 self.regressor.save_weights(self.chkpt)
                
#         df_train_his.to_csv(self.base_save_dir + '/train_his.csv', index=True)
            
#         return df_train_his
    
#     def predict(self, X):
#         return self.regressor.predict(X)[:,0]
    

In [9]:
path_param={
    'input_dim':1071,
    'hidden_layer_sizes':[4096,4096,4096,4096,2048,256,32],
    'activation':'relu',
    'dropout':.3,
}
base_save_dir = create_path('KerasMLPRegressor', path_param)
param={
    'algorithm': {
        'cls': 'KerasMLPRegressor',
        'fit': {
            'versbose':10, 
            'epochs':100, 
            'early_stopping_rounds':20,
        },
        'init': {
            'batch':32, 
            'solver':'adam', 
            'metric':'mean_absolute_error', 
            'lr':.0001, 
            'sgd_momentum':.9, 
            'sgd_decay':0.00001,
            'base_save_dir':base_save_dir, 
            'alias':'kerasmlp',
            **path_param
        }
    },
    'columns': tsfresh_columns,
    'feature_importance': {
        'is_output': False,
        'permutation_feature_importance': False,
        'permutation_random_state': 1
    },
    'kfold': {
        'n_splits': 8,
        'random_state': 1985,
        'shuffle': True,
        'type': 'stratified'
    },
    'scaler': {
        'cls': 'StandardScaler'
    }
}

In [10]:
mytrial = []

In [None]:
# run one try
df_his,  df_feature_importances, df_valid_pred, df_test_pred =  EP.process(df_train, param, df_test = df_test, trial=mytrial)


Epoch 00001: val_loss improved from inf to 2.97339, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00002: val_loss improved from 2.97339 to 2.01919, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00003: val_loss improved from 2.01919 to 1.93348, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00004: val_loss improved from 1.93348 to 1.90592, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00005: val_loss improved from 1.90592 to 1.85838, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],act

Epoch 00068: val_loss improved from 0.85808 to 0.77683, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00069: val_loss did not improve from 0.77683

Epoch 00070: val_loss did not improve from 0.77683

Epoch 00071: val_loss did not improve from 0.77683

Epoch 00072: val_loss did not improve from 0.77683

Epoch 00073: val_loss did not improve from 0.77683

Epoch 00074: val_loss did not improve from 0.77683

Epoch 00075: val_loss did not improve from 0.77683

Epoch 00076: val_loss did not improve from 0.77683

Epoch 00077: val_loss did not improve from 0.77683

Epoch 00078: val_loss did not improve from 0.77683

Epoch 00079: val_loss did not improve from 0.77683

Epoch 00080: val_loss improved from 0.77683 to 0.77226, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 0008

Epoch 00044: val_loss improved from 1.13060 to 1.04020, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00045: val_loss did not improve from 1.04020

Epoch 00046: val_loss did not improve from 1.04020

Epoch 00047: val_loss did not improve from 1.04020

Epoch 00048: val_loss improved from 1.04020 to 1.03674, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00049: val_loss did not improve from 1.03674

Epoch 00050: val_loss did not improve from 1.03674

Epoch 00051: val_loss did not improve from 1.03674

Epoch 00052: val_loss improved from 1.03674 to 1.02846, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00053: val_loss improved from 1.02846 to 0.98803, savin

Epoch 00017: val_loss did not improve from 1.49144

Epoch 00018: val_loss improved from 1.49144 to 1.42384, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00019: val_loss improved from 1.42384 to 1.39736, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00020: val_loss did not improve from 1.39736

Epoch 00021: val_loss did not improve from 1.39736

Epoch 00022: val_loss improved from 1.39736 to 1.38703, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00023: val_loss improved from 1.38703 to 1.38338, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00

Epoch 00002: val_loss improved from 2.63274 to 2.12129, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00003: val_loss improved from 2.12129 to 2.00867, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00004: val_loss improved from 2.00867 to 1.94645, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00005: val_loss improved from 1.94645 to 1.93632, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],activation='relu',dropout=0.3/kerasmlp.hdf5

Epoch 00006: val_loss improved from 1.93632 to 1.89515, saving model to KerasMLPRegressor/input_dim=1071,hidden_layer_sizes=[4096, 4096, 4096, 4096, 2048, 256, 32],