# Import libraries

In [1]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import pandas as pd
import numpy as np
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation, BatchNormalization, Dropout, Input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import keras
from datetime import datetime
from sklearn import preprocessing
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import plot_model
from sklearn.model_selection import GridSearchCV
from sklearn.utils import shuffle
import pickle
import glob
from keras.utils import to_categorical

Using TensorFlow backend.


# Load processed dataset
The data we load here are already shuffled and normalized.

In [2]:
data_directory = 'dataset/3. normalized/'
x_train = pickle.load(open(data_directory + "x_train.p", "rb"))
y_train = pickle.load(open(data_directory + "y_train.p", "rb"))
x_test = pickle.load(open(data_directory + "x_test.p", "rb"))
y_test = pickle.load(open(data_directory + "y_test.p", "rb"))
print('x_train: ' + str(x_train.shape))
print('y_train: ' + str(y_train.shape))
print('x_test: ' + str(x_test.shape))
print('y_test: ' + str(y_test.shape))

x_train: (966, 26)
y_train: (966, 2)
x_test: (242, 26)
y_test: (242, 2)


# Convert some columns to categorical columns

In [3]:
def col_to_categorical(arr, col):
    deleted = np.delete(arr, np.s_[col], axis=1)
    categorical = to_categorical(arr[:, col])
    arr = np.hstack([deleted, categorical])
    return arr

# De-normalize the columns
pd.DataFrame(x_train).head()
x_train[:, 24] = x_train[:, 24] * 12
x_test[:, 24] = x_test[:, 24] * 12

# Convert de-normalized columns to categorical
x_train = col_to_categorical(x_train, 24)
x_test = col_to_categorical(x_test, 24)
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,28,29,30,31,32,33,34,35,36,37
0,0.226135,0.165461,0.218373,0.136944,0.218364,0.326976,0.467346,0.250757,0.120083,0.321016,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0794,0.089118,0.061998,0.030564,0.062079,0.089922,0.191393,0.04328,0.024083,0.080073,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.103458,0.103848,0.100653,0.06595,0.10069,0.158392,0.283503,0.082511,0.046162,0.196872,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.146945,0.220966,0.091491,0.064243,0.091571,0.334515,0.488109,0.191163,0.10505,0.330666,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.130953,0.136021,0.060241,0.02997,0.06028,0.179025,0.251038,0.107355,0.057036,0.181617,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Tunable parameters

In [4]:
epochs = 2000
batch_size = 20
learning_rate = 0.0001
decay = 0.00001
dropout = 0.5

# Model for classification

In [5]:
date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
root_directory = 'results/results-classify-sae-03/'
model_directory = root_directory + "model/"
tensorboard_directory = root_directory + "tensorboard/"
autoencoder_2_directory = 'results/results-autoencoder-02/model/'
autoencoder_1_path = ''
autoencoder_2_path = ''
def create_model(layers):
    global date
    global autoencoder_1_path
    global autoencoder_2_path
    x = Input(shape=(x_train.shape[1],))

    y = Dense(units = layers['layer_1'], activation='relu')(x)
    y = Dropout(dropout/2)(y)

    if (layers['layer_2'] > 0):
        y = Dense(units = layers['layer_2'], activation='relu')(y)
        y = Dropout(dropout)(y)
        if (layers['layer_3'] > 0):
            y = Dense(units = layers['layer_3'], activation='relu')(y)
            y = Dropout(dropout)(y)
    
    y = Dense(units = y_train.shape[1], activation='softmax')(y)
    model = Model(x, y)
    
    # Create directory
    directory = model_directory + date + '/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    # Write model hyper-parameters
    df = pd.DataFrame({
        'Layer 1': layers['layer_1'],
        'Layer 2': layers['layer_2'],
        'Layer 3': layers['layer_3'],
        'Dropout 1': dropout/2,
        'Dropout 2 and 3': dropout,
        'Learning Rate': learning_rate,
        'Decay': decay,
        'Batch size': batch_size,
        'Autoencoder 1': autoencoder_1_path,
        'Autoencoder 2': autoencoder_2_path,
        'Epochs': epochs
    }, index=[0])
    df.to_csv(model_directory + date + '/params.csv')
    
    # Write model summary
    file2 = open(directory + "summary.txt", "a")
    model.summary(print_fn=lambda line: file2.write(line + '\n'))
    file2.close()

    # Write model diagram
    plot_model(model, to_file=directory + 'model.png', show_shapes=True, show_layer_names=False)
    
    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate, decay=decay), loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
    
    return model

# Prepare callbacks

In [6]:
class KerasClassifierTensorBoard(KerasClassifier):
    def fit(self, x, y, **kwargs):
        global date
        date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=tensorboard_directory + date + '/')
        csv_logger = keras.callbacks.CSVLogger(model_directory + date + '/epochs.csv')
        model_checkpoint = keras.callbacks.ModelCheckpoint(model_directory + date + '/weights.hdf5', save_best_only=True)
        callbacks = [tensorboard_callback, csv_logger, model_checkpoint]
        return super(KerasClassifierTensorBoard, self).fit(x, y, callbacks=callbacks, verbose=0, **kwargs)

# Grid search

In [7]:
# autoencoder_2_directories = glob.glob(autoencoder_2_directory + '*')
# for autoencoder_2_path in autoencoder_2_directories:

autoencoder_2_path = 'Results/results-autoencoder-02/model/2018-03-05-03-12-40'
weights2 = load_model(autoencoder_2_path + '/weights.hdf5')
params2 = pd.read_csv(autoencoder_2_path + '/params.csv')
autoencoder_1_path =  params2.iloc[0]['Autoencoder 1']
weights1 = load_model(autoencoder_1_path)

# Pass input through auto encoder #1
x_train_output = weights1.predict(x_train)
x_test_output = weights1.predict(x_test)

# Pass auto encoder #1 output through auto encoder #2
x_train_output = weights2.predict(x_train_output)
x_test_output = weights2.predict(x_test_output)

model = KerasClassifierTensorBoard(build_fn=create_model, epochs=epochs, batch_size=batch_size)

layers = [
    {'layer_1': 10, 'layer_2': 6, 'layer_3': 4},
    {'layer_1': 4, 'layer_2': 8, 'layer_3': 16},
    {'layer_1': 4, 'layer_2': 8, 'layer_3': 0},
    {'layer_1': 16, 'layer_2': 8, 'layer_3': 4},
    {'layer_1': 32, 'layer_2': 16, 'layer_3': 8},
    {'layer_1': 32, 'layer_2': 16, 'layer_3': 0}
]
param_grid = dict(
    layers=layers
)

grid = GridSearchCV(estimator = model, param_grid = param_grid, error_score=0, verbose=2, cv=[(slice(None), slice(None))], n_jobs=1, fit_params=dict(validation_data=(x_test_output, y_test)))
grid_result = grid.fit(np.vstack([x_train_output, x_train]), np.vstack([y_train, y_train]))

ValueError: Error when checking : expected input_5 to have shape (None, 26) but got array with shape (966, 38)