In [162]:
import sys
from pathlib import Path
from datetime import datetime
from dateutil.tz import gettz

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras import utils
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import GridSearchCV

np.random.seed(757566)

In [163]:
fname = 'GunPoint' # private_dog0_correct_plus
log_to_file = True

tensorboard_dir = '../logs/tensorboard'
logs_dir = '../logs'
timestamp = '{:%Y-%m-%dT%H:%M}'.format(datetime.now(gettz("Europe/London")))
logs_dir = logs_dir +'/' + timestamp
tensorboard_dir = tensorboard_dir +'/' + timestamp
if 'private' in fname:
    fdir = '../data/private_data/private_events_dev2' 
else:
    fdir = '../data' 

In [164]:
def readucr(filename):
    ''' Load a dataset from a file in UCR format
    space delimited, class labels in the first column.
    Returns
    X : DNN input data
    Y : class labels
    '''
    data = np.loadtxt(Path(filename))
    Y = data[:,0]
    X = data[:,1:]
    return X, Y

In [165]:
# Hyperparameter grid search adapted from Machine Learning Mastery
# https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
# Use scikit-learn to grid search the batch size and epochs

# Function to create model, required for KerasClassifier
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])   
    return model


def create_functional_model():
    # create model
    drop = 0.2
    num = 64
    l2 = 0.1
    nb_classes = 2
    x = Input(shape=(X_SHAPE))
    y = Dropout(drop,name='Drop010')(x)
    y = Dense(num, kernel_regularizer=regularizers.l2(l2), activation='relu', name='Dense010')(y)
    y = Dropout(drop,name='Drop081')(y)
    out = Dense(nb_classes-1, activation='sigmoid', name='Dense080')(y)
    model = Model(x, out)
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model


def create_fcn(input_shape=(150,1), num_features=100, filter_size=10, pooling_size=3, dropout=0.5):
    ''' Create FCN model '''
    num_features0 = num_features
    num_features1 = math.floor(1.5 * num_features)
    nb_classes = 2
    x = Input(shape=(input_shape))
    conv_x = keras.layers.Conv1D(num_features0, filter_size, activation='relu')(x)
    conv_x = keras.layers.Conv1D(num_features0, filter_size, activation='relu')(conv_x)
    conv_x = keras.layers.MaxPooling1D(pooling_size)(conv_x)
    conv_x = keras.layers.Conv1D(num_features1, filter_size, activation='relu')(conv_x)
    conv_x = keras.layers.Conv1D(num_features1, filter_size, activation='relu')(conv_x)
    full = keras.layers.GlobalAveragePooling1D()(conv_x)
    y = Dropout(dropout,name='Dropout')(full)
    out = Dense(nb_classes, activation='sigmoid')(full)
    model = Model(x, out)
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model


def create_fcn_simple():
    ''' Create FCN model '''
    input_shape=(150,1)
    num_features=100
    filter_size=10
    pooling_size=3
    dropout=0.5
    num_features0 = num_features
    num_features1 = math.floor(1.5 * num_features)
    nb_classes = 2
    x = Input(shape=(input_shape))
    conv_x = keras.layers.Conv1D(num_features0, filter_size, activation='relu')(x)
    conv_x = keras.layers.Conv1D(num_features0, filter_size, activation='relu')(conv_x)
    conv_x = keras.layers.MaxPooling1D(pooling_size)(conv_x)
    conv_x = keras.layers.Conv1D(num_features1, filter_size, activation='relu')(conv_x)
    conv_x = keras.layers.Conv1D(num_features1, filter_size, activation='relu')(conv_x)
    full = keras.layers.GlobalAveragePooling1D()(conv_x)
    y = Dropout(dropout,name='Dropout')(full)
    out = Dense(nb_classes, activation='sigmoid')(full)
    model = Model(x, out)
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model

In [166]:
def prepare_data(y):
    nb_classes = 2
    y = (y - y.min())/(y.max()-y.min())*(nb_classes-1)
    Y = utils.to_categorical(y, nb_classes)
    return Y

In [167]:
# load dataset
#dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
#X = dataset[:,0:8]
#Y = dataset[:,8]

x_train, y_train = readucr(fdir+'/'+fname+'/'+fname+'_TRAIN.txt')
x_test, y_test = readucr(fdir+'/'+fname+'/'+fname+'_TEST.txt')
X = np.concatenate((x_train, x_test), axis=0)
Y = np.concatenate((y_train, y_test), axis=0)
X = X.reshape(X.shape + (1,))
input_shape = X.shape[1:]
print(input_shape)
Y = prepare_data(Y)

# Add callbacks
callbacks = []
tb_dir = tensorboard_dir+'/'+fname
Path(tb_dir).mkdir(parents=True, exist_ok=True) 
callbacks.append(keras.callbacks.TensorBoard(log_dir=tb_dir, histogram_freq=0))



# create model
#model = KerasClassifier(build_fn=create_model, verbose=0)
#model = KerasClassifier(build_fn=create_functional_model, verbose=1,
#                        batch_size=32
#                        callbacks=callbacks)

# define the grid search parameters
batch_size = 32
epochs = 10
num_features = [32] # [32, 64]
filter_size = [4] # [4, 16]
pooling_size = [4] # [4, 8]
dropout = [0.5]
#param_grid = dict(num_features=num_features, filter_size=filter_size, pooling_size=pooling_size, dropout=dropout)
param_grid = dict(pooling_size=pooling_size)

mode = 1
if mode == 0:
    model = KerasClassifier(build_fn=create_fcn, 
                            input_shape=input_shape,
                            num_features=32, filter_size=4,
                            dropout=0.5,
                            batch_size=batch_size, epochs=epochs,
                           verbose=1)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, error_score=0) #fit_params={'callbacks': callbacks})
    grid_result = grid.fit(X, Y)
elif mode == 1:
    param_grid = dict(batch_size=[32, 64])
    model = KerasClassifier(build_fn=create_fcn_simple, 
                               epochs=epochs,
                               verbose=1)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, error_score=0) #fit_params={'callbacks': callbacks})
    grid_result = grid.fit(X, Y)
else:
    model = create_fcn(input_shape=input_shape,
                        num_features=32, filter_size=4, pooling_size=4,
                        dropout=0.5)
    model.fit(X, Y, batch_size=batch_size, epochs=epochs, verbose=1)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
     print("%f (%f) with: %r" % (mean, stdev, param))

(150, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best: 0.815000 using {'batch_size': 32}
0.815000 (0.131048) with: {'batch_size': 32}
0.572500 (0.077178) with: {'batch_size': 64}


In [168]:
grid_result.cv_results_
cv = pd.DataFrame(grid_result.cv_results_)
cv

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_batch_size,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,20.831505,5.597483,0.815,0.837158,32,{'batch_size': 32},1,0.873134,0.842105,0.634328,0.695489,0.939394,0.973881,0.289463,0.065676,0.131048,0.113707
1,21.255438,5.996495,0.5725,0.622685,64,{'batch_size': 64},2,0.679105,0.650376,0.537313,0.669173,0.5,0.548507,0.18191,0.142516,0.077178,0.05301
