# Import libraries

In [7]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import pandas as pd
import numpy as np
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation, BatchNormalization, Dropout, Input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import keras
from datetime import datetime
from sklearn import preprocessing
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import plot_model
from sklearn.model_selection import GridSearchCV
from sklearn.utils import shuffle
import pickle
import glob
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load processed dataset
The data we load here are already shuffled and normalized.

In [2]:
data_directory = 'dataset/3. normalized/'
x_train = pickle.load(open(data_directory + "x_train.p", "rb"))
y_train = pickle.load(open(data_directory + "y_train.p", "rb"))
x_test = pickle.load(open(data_directory + "x_test.p", "rb"))
y_test = pickle.load(open(data_directory + "y_test.p", "rb"))
print('x_train: ' + str(x_train.shape))
print('y_train: ' + str(y_train.shape))
print('x_test: ' + str(x_test.shape))
print('y_test: ' + str(y_test.shape))

x_train: (966, 26)
y_train: (966, 2)
x_test: (242, 26)
y_test: (242, 2)


# Tunable parameters

In [3]:
epochs = 2000
batch_size = 20
learning_rate = 0.0001
decay = 0.00001
dropout = 0.5

# Model for classification

In [4]:
date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
root_directory = 'results/results-classify-sae-04/'
model_directory = root_directory + "model/"
tensorboard_directory = root_directory + "tensorboard/"
autoencoder_2_directory = 'results/results-autoencoder-02/model/'
autoencoder_1_path = ''
autoencoder_2_path = ''
def create_model(layers):
    global date
    global autoencoder_1_path
    global autoencoder_2_path
    x = Input(shape=(x_train.shape[1],))

    y = Dense(units = layers['layer_1'], activation='relu')(x)
    y = Dropout(dropout/2)(y)

    if (layers['layer_2'] > 0):
        y = Dense(units = layers['layer_2'], activation='relu')(y)
        y = Dropout(dropout)(y)
        if (layers['layer_3'] > 0):
            y = Dense(units = layers['layer_3'], activation='relu')(y)
            y = Dropout(dropout)(y)
    
    y = Dense(units = y_train.shape[1], activation='softmax')(y)
    model = Model(x, y)
    
    # Create directory
    directory = model_directory + date + '/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    # Write model hyper-parameters
    df = pd.DataFrame({
        'Layer 1': layers['layer_1'],
        'Layer 2': layers['layer_2'],
        'Layer 3': layers['layer_3'],
        'Dropout 1': dropout/2,
        'Dropout 2 and 3': dropout,
        'Learning Rate': learning_rate,
        'Decay': decay,
        'Batch size': batch_size,
        'Autoencoder 1': autoencoder_1_path,
        'Autoencoder 2': autoencoder_2_path,
        'Epochs': epochs
    }, index=[0])
    df.to_csv(model_directory + date + '/params.csv')
    
    # Write model summary
    file2 = open(directory + "summary.txt", "a")
    model.summary(print_fn=lambda line: file2.write(line + '\n'))
    file2.close()

    # Write model diagram
    plot_model(model, to_file=directory + 'model.png', show_shapes=True, show_layer_names=False)
    
    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(lr=learning_rate, decay=decay), loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
    
    return model

# Pass input through autoencoders

In [5]:
autoencoder_1_path = 'results/results-autoencoder-01-02/model/2018-03-05-18-34-05/weights.hdf5'
autoencoder_2_path = 'results/results-autoencoder-02-02/model/2018-03-05-20-49-19/weights.hdf5'
weights1 = load_model(autoencoder_1_path)
weights2 = load_model(autoencoder_2_path)

# Pass input through auto encoder #1
x_train_output = weights1.predict(x_train)
x_test_output = weights1.predict(x_test)

# Pass auto encoder #1 output through auto encoder #2
x_train_output = weights2.predict(x_train_output)
x_test_output = weights2.predict(x_test_output)

# Grid search

In [12]:
knn = KNeighborsClassifier()
knn.fit(x_train_output, y_train)
y_pred = knn.predict(x_test_output)
accuracy_score(y_test, y_pred)

0.66115702479338845

In [13]:
knn = KNeighborsClassifier()
knn.fit(x_train_final, y_train_final)
y_pred = knn.predict(x_test_output)
accuracy_score(y_test, y_pred)

0.66115702479338845