# Import libraries

In [1]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import pandas as pd
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, BatchNormalization, Dropout, Input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import keras
from datetime import datetime
from sklearn import preprocessing
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import plot_model
from sklearn.model_selection import GridSearchCV
from sklearn.utils import shuffle

Using TensorFlow backend.


# Load training dataset

In [7]:
train_dataframe = pd.read_csv('dataset/1. istanbul/train_data.csv')

# Visualize some training data

In [8]:
train_dataframe.head()

Unnamed: 0,Subject ID,Jitter (local),"Jitter (local, absolute)",Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),"Shimmer (local, dB)",Shimmer (apq3),Shimmer (apq5),...,Maximum pitch,Number of pulses,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,UPDRS,Class information
0,1,1.488,9e-05,0.9,0.794,2.699,8.334,0.779,4.517,4.609,...,187.576,160,159,0.006065,0.000416,0.0,0,0.0,23,1
1,1,0.728,3.8e-05,0.353,0.376,1.059,5.864,0.642,2.058,3.18,...,234.505,170,169,0.005181,0.000403,2.247,0,0.0,23,1
2,1,1.22,7.4e-05,0.732,0.67,2.196,8.719,0.875,4.347,5.166,...,211.442,1431,1427,0.006071,0.000474,10.656,1,0.178,23,1
3,1,2.502,0.000123,1.156,1.634,3.469,13.513,1.273,5.263,8.771,...,220.23,94,92,0.00491,0.00032,0.0,0,0.0,23,1
4,1,3.509,0.000167,1.715,1.539,5.145,9.112,1.04,3.102,4.927,...,225.162,117,114,0.004757,0.00038,18.182,1,13.318,23,1


# Prepare training data

In [9]:
train_x = train_dataframe.drop(['Subject ID', 'Class information', 'UPDRS'], axis=1)
train_x = train_x.as_matrix()
print(train_x.shape)

(1040, 26)


# Prepare training labels

In [10]:
train_y = train_dataframe['Class information']
train_y = train_y.as_matrix()
print(train_y.shape)
train_y = np_utils.to_categorical(train_y)
print(train_y.shape)

(1040,)
(1040, 2)


# Prepare test data

In [11]:
test_dataframe = pd.read_csv('dataset/1. istanbul/test_data.csv')
test_x = test_dataframe.drop(['Subject ID', 'Class information'], axis=1)
test_x = test_x.as_matrix()
print(test_x.shape)

(168, 26)


# Prepare test labels

In [12]:
test_y = test_dataframe['Class information']
test_y = test_y.as_matrix()
print(test_y.shape)
test_y = np_utils.to_categorical(test_y)
print(test_y.shape)

(168,)
(168, 2)


# Combine train and test set

In [13]:
train_x = np.vstack([train_x, test_x])
train_y = np.vstack([train_y, test_y])
print(train_x.shape)
print(train_y.shape)

(1208, 26)
(1208, 2)


# Normalize data

In [14]:
mins = np.min(train_x, axis=0)
maxs = np.max(train_x, axis=0)
rng = maxs - mins
train_x = 1.0 - (((1.0 - 0.0) * (maxs - train_x)) / rng)
test_x = 1.0 - (((1.0 - 0.0) * (maxs - test_x)) / rng)
pd.DataFrame(train_x).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,0.096023,0.110551,0.106928,0.054303,0.106923,0.186514,0.266893,0.162585,0.056981,0.142044,...,0.034343,0.193353,0.204351,0.107383,0.106783,0.378729,0.062423,0.0,0.0,0.0
1,0.042716,0.042501,0.038278,0.023294,0.038318,0.12526,0.215176,0.065922,0.037237,0.150903,...,0.049169,0.238354,0.295334,0.114094,0.113499,0.295624,0.060411,0.025488,0.0,0.0
2,0.077225,0.089595,0.085843,0.045104,0.085882,0.196062,0.303133,0.155902,0.064677,0.158903,...,0.043064,0.204329,0.250621,0.960403,0.958361,0.379296,0.071557,0.120874,0.083333,0.002575
3,0.167146,0.152809,0.139056,0.116617,0.139134,0.314949,0.453379,0.19191,0.114487,0.367528,...,0.035815,0.298746,0.267658,0.063087,0.061786,0.270109,0.047233,0.0,0.0,0.0
4,0.237778,0.209959,0.209212,0.10957,0.209245,0.205808,0.365421,0.106962,0.061375,0.278121,...,0.038016,0.299027,0.27722,0.078523,0.076561,0.255738,0.056711,0.206243,0.083333,0.192688


# Shuffle data

In [16]:
train_x, train_y = shuffle(train_x, train_y)
pd.DataFrame(train_y).head()

Unnamed: 0,0,1
0,0.0,1.0
1,0.0,1.0
2,1.0,0.0
3,0.0,1.0
4,1.0,0.0


# Tuning parameters

In [17]:
validation_split = 0.172;
epochs = 5

# Create model

In [18]:
date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
root_directory = 'results/results-dropout/'
model_directory = root_directory + "model/"
tensorboard_directory = root_directory + "tensorboard/"
def create_model(param, dropout):
    global date
    x = Input(shape=(train_x.shape[1],))

    y = Dense(units = param['layer_1'], activation='relu')(x)
    y = Dropout(dropout)(y)

    y = Dense(units = param['layer_2'], activation='relu')(y)
    y = Dropout(dropout)(y)
    
    y = Dense(units = param['layer_3'], activation='relu')(y)
    y = Dropout(dropout)(y)
    
    if (param['layer_4'] > 0):
        y = Dense(units = param['layer_4'], activation='relu')(y)
        y = Dropout(dropout)(y)

    y = Dense(units = train_y.shape[1], activation='softmax')(y)
    model = Model(x, y)
    
    # Create directory
    directory = model_directory + date + '/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    # Write model hyper-parameters
    file = open(directory + "params.txt", "a")
    file.write("optimizer: %s, layer 1: %d, layer 2: %d, layer 3: %d, layer 4: %d, dropout: %f" % (param['optimizer_text'], param['layer_1'], param['layer_2'], param['layer_3'], param['layer_4'], dropout))
    file.close()
    
    # Write model summary
    file2 = open(directory + "summary.txt", "a")
    model.summary(print_fn=lambda x: file2.write(x + '\n'))
    file2.close()

    # Write model diagram
    plot_model(model, to_file=directory + 'model.png', show_shapes=True, show_layer_names=False)
    
    # Compile the model
    model.compile(optimizer=param['optimizer'], loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
    
    return model

# Prepare callbacks

In [19]:
class KerasClassifierTensorBoard(KerasClassifier):
    def fit(self, x, y, **kwargs):
        global date
        date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=tensorboard_directory + date + '/')
        csv_logger = keras.callbacks.CSVLogger(model_directory + date + '/epochs.csv')
        callbacks = [tensorboard_callback, csv_logger]
        return super(KerasClassifierTensorBoard, self).fit(x, y, callbacks=callbacks, **kwargs)

# Grid search hyperparameters

In [20]:
model = KerasClassifierTensorBoard(build_fn=create_model, epochs=epochs, batch_size=20)

param = [
    {'optimizer': keras.optimizers.Adam(lr=0.0001, decay=0.0001), 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_1': 4, 'layer_2': 8, 'layer_3': 16, 'layer_4': 0},
    {'optimizer': keras.optimizers.Adam(lr=0.000001, decay=0.0001), 'optimizer_text': 'Adam, lr=0.000001, decay=0.0001', 'layer_1': 4, 'layer_2': 8, 'layer_3': 16, 'layer_4': 0},
    {'optimizer': keras.optimizers.RMSprop(lr=0.0001, decay=0.0001), 'optimizer_text': 'RMSprop, lr=0.0001, decay=0.0001', 'layer_1': 4, 'layer_2': 8, 'layer_3': 16, 'layer_4': 0},
    {'optimizer': keras.optimizers.Adam(lr=0.0001, decay=0.0001), 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_1': 4, 'layer_2': 64, 'layer_3': 8, 'layer_4': 0},
    {'optimizer': keras.optimizers.Adam(lr=0.0001, decay=0.0001), 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_1': 6, 'layer_2': 6, 'layer_3': 32, 'layer_4': 0},
    {'optimizer': keras.optimizers.Adam(lr=0.0001, decay=0.0001), 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_1': 6, 'layer_2': 6, 'layer_3': 32, 'layer_4': 8},
]
dropouts = [0.5, 0.4, 0.6]
param_grid = dict(
    param=param, 
    dropout = dropouts
)

grid = GridSearchCV(estimator = model, param_grid = param_grid, error_score=0, verbose=2, cv=[(slice(None), slice(None))], n_jobs=1, fit_params=dict(validation_split=validation_split))
grid_result = grid.fit(train_x, train_y)

Fitting 1 folds for each of 18 candidates, totalling 18 fits
[CV] param={'layer_2': 8, 'optimizer': <keras.optimizers.Adam object at 0x000002387A4958D0>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 16, 'layer_4': 0, 'layer_1': 4}, dropout=0.5 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 8, 'optimizer': <keras.optimizers.Adam object at 0x000002387A50CC50>, 'optimizer_text': 'Adam, lr=0.000001, decay=0.0001', 'layer_3': 16, 'layer_4': 0, 'layer_1': 4}, dropout=0.5 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.6s remaining:    0.0s


Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 8, 'optimizer': <keras.optimizers.RMSprop object at 0x000002387A52AC18>, 'optimizer_text': 'RMSprop, lr=0.0001, decay=0.0001', 'layer_3': 16, 'layer_4': 0, 'layer_1': 4}, dropout=0.5 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 64, 'optimizer': <keras.optimizers.Adam object at 0x000002387A543828>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 8, 'layer_4': 0, 'layer_1': 4}, dropout=0.5 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 6, 'optimizer': <keras.optimizers.Adam object at 0x000002387A4E9588>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 32, 'layer_4': 0, 'layer_1': 6}, dropout=0.5 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV

Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 64, 'optimizer': <keras.optimizers.Adam object at 0x000002387A543828>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 8, 'layer_4': 0, 'layer_1': 4}, dropout=0.4 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 6, 'optimizer': <keras.optimizers.Adam object at 0x000002387A4E9588>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 32, 'layer_4': 0, 'layer_1': 6}, dropout=0.4 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 6, 'optimizer': <keras.optimizers.Adam object at 0x000002387A59CAC8>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 32, 'layer_4': 8, 'layer_1': 6}, dropout=0.4 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 8, 'optimizer': <keras.optimizers.Adam object at 0x00

Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 6, 'optimizer': <keras.optimizers.Adam object at 0x000002387A4E9588>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 32, 'layer_4': 0, 'layer_1': 6}, dropout=0.6 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[CV] param={'layer_2': 6, 'optimizer': <keras.optimizers.Adam object at 0x000002387A59CAC8>, 'optimizer_text': 'Adam, lr=0.0001, decay=0.0001', 'layer_3': 32, 'layer_4': 8, 'layer_1': 6}, dropout=0.6 
Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:  3.0min finished


Train on 1000 samples, validate on 208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Print results

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("Mean %f, Std %f with: %r" % (mean, stdev, param))

# The End