# Training_NN Notebook

In [1]:
# This Notebook is used to build, train, and evaluate a simple Backpropagation NN
import numpy as np
import cv2
from keras.regularizers import l2
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.layers.advanced_activations import PReLU
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

Using TensorFlow backend.


In [2]:
# Load data from preprocessing step
features = np.load("./PP_Data/features.npy")
labels = np.load("./PP_Data/labels.npy")

test_features = np.load("./PP_Data/test_features.npy")
test_labels = np.load("./PP_Data/test_labels.npy")

n_input = features.shape[1] # number of input nodes in 1st layer
n_class = labels.shape[1] # number of output nodes in last layer

# batch size for training (right now it is the number of original training images
# found by divide the current number of samples by the number of epochs used in new image generation)
batch_size = features.shape[0]//10 
n_input, n_class, batch_size # Check number of input features, classes, and batch size

(225, 4, 68)

In [3]:
# This is the base function used to create a Keras NN using sklearn's RandomizedCV optimization
# based off of http://ethen8181.github.io/machine-learning/keras/nn_keras_hyperparameter_tuning.html
def build_keras_base(hidden_layers = [50], dropout_rate = 0, 
                     l2_penalty = 0.1, optimizer = 'adam',
                     n_input = n_input, n_class = n_class):
    """
    Keras Multi-layer neural network. Fixed parameters include: 
    1. activation function (PRelu)
    2. always uses batch normalization after the activation
    3. use adam as the optimizer
    
    Parameters
    ----------
    Tunable parameters are (commonly tuned)
    
    hidden_layers: list
        the number of hidden layers, and the size of each hidden layer
    
    dropout_rate: float 0 ~ 1
        if bigger than 0, there will be a dropout layer
    
    l2_penalty: float
        or so called l2 regularization
    
    optimizer: string or keras optimizer
        method to train the network
    
    Returns
    -------
    model : 
        a keras model

    Reference
    ---------
    https://keras.io/scikit-learn-api/
    """   
    model = Sequential() # model type
    # Dynamically make hidden layers based on parameters
    for index, layers in enumerate(hidden_layers):       
        if not index:
            # specify the input_dim to be the number of features for the first layer
            model.add(Dense(layers, input_dim = n_input, kernel_regularizer = l2(l2_penalty)))
        else:
            model.add(Dense(layers, kernel_regularizer = l2(l2_penalty)))
        
        # insert BatchNorm layer immediately after fully connected layers
        # and before activation layer
        model.add(BatchNormalization())
        model.add(PReLU())        
        if dropout_rate:
            model.add(Dropout(p = dropout_rate))
    
    model.add(Dense(n_class))
    model.add(Activation('softmax'))
    
    # the loss for binary and muti-class classification is different 
    loss = 'binary_crossentropy'
    if n_class > 2:
        loss = 'categorical_crossentropy'
    
    model.compile(loss = loss, optimizer = optimizer, metrics = ['accuracy'])   
    return model

In [4]:
# pass in fixed parameters n_input and n_class along with the previous function for cross-validation
model_keras = KerasClassifier(
    build_fn = build_keras_base,
    n_input = n_input,
    n_class = n_class,
)

# The following is also based on http://ethen8181.github.io/machine-learning/keras/nn_keras_hyperparameter_tuning.html
# random search's parameter:
# specify the options and store them inside the dictionary
# batch size and training method can also be hyperparameters, 
# but it is fixed
early_stop = EarlyStopping(
    monitor = 'val_loss', min_delta = 0.1, patience = 5, verbose = 0)

# Callbacks can be used when fitting the data to allow the process to stop when certain criteria is met
callbacks = [early_stop]

# Set the parameters for fitting the model
keras_fit_params = {   
    'callbacks': callbacks,
    'epochs': 100,
    'batch_size': batch_size,
    'validation_data': {'input': test_features, 
                        'output': test_labels},
    'verbose': 0
}

# All lists of parameters will be used to determine the optimal model during cross validation
l2_penalty_opts = [0.01, 0.1, 0.5]
dropout_rate_opts  = [0, 0.2, 0.5]
hidden_layers_opts = []

# For me this was the most important choice, since it seemed as though most of the 
# other parameters are typically used in object recognition
# For the number of nodes and hiddend layers I create all possible combinations of single
# double hidden layers with a range of 1 to 1000 nodes in each
for i in range(1,1000):
    hidden_layers_opts.append(tuple((i,)))
    for j in range(1,1000):
        hidden_layers_opts.append(tuple((i,j)))

# set parameters that will be optimized
keras_param_options = {
    'hidden_layers': hidden_layers_opts,
    'dropout_rate': dropout_rate_opts,  
    'l2_penalty': l2_penalty_opts,
    
}

In [5]:
# Create RandomizedSearchCV object containing previously created parameters
# based on http://ethen8181.github.io/machine-learning/keras/nn_keras_hyperparameter_tuning.html
rs_keras = RandomizedSearchCV( 
    model_keras, 
    param_distributions = keras_param_options,
    scoring = 'neg_log_loss',
    cv=5,
    n_jobs = -1,
    verbose = 10
)

# Fit the data
# This will run cross validation on the data set and determine the optimal
# parameters for a neural network based off the options provided
# The optimal model can then be immediately accessed and saved
rs_keras.fit(features, labels,
             validation_data = (test_features, test_labels),
             callbacks=callbacks,
             epochs=100,
             batch_size=batch_size,
             verbose=1)

# Output optimal parameters and corresponding accuracy measurement
print('Best score obtained: {0}'.format(rs_keras.best_score_))
print('Parameters:')
for param, value in rs_keras.best_params_.items():
    print('\t{}: {}'.format(param, value))

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   32.4s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   59.8s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done  41 out of  50 | elapsed:  3.9min remaining:   50.8s
[Parallel(n_jobs=-1)]: Done  47 out of  50 | elapsed:  4.7min remaining:   17.9s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  4.7min finished



Train on 680 samples, validate on 23 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Best score obtained: -0.13653539193666586
Parameters:
	l2_penalty: 0.01
	hidden_layers: (31, 956)
	dropout_rate: 0.2


In [6]:
# We can evaluate the validation data again on the optimal model for easier to read results
# based on https://www.pyimagesearch.com/2016/09/26/a-simple-neural-network-with-python-and-keras/
(loss, accuracy) = rs_keras.best_estimator_.model.evaluate(test_features, test_labels,
                                                           batch_size=5, verbose=1)

print("loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))

# dump the network architecture and weights to file
print("dumping architecture and weights to file...")

loss=0.2865, accuracy: 100.0000%
dumping architecture and weights to file...


In [7]:
rs_keras.best_estimator_.model.save("./Models/model.hdf5") # Save the model 

In [8]:
# If the parameters are already known from previous CV tests, they can be manually put into the base keras
# model function and fitted into a model
# This should be faster that running cross-validation, although any possible optimization could be missed
# if the input data is changed by a significant amount

In [9]:
# Example values based on my results
# l2_pen = 0.01
# hl = [159, 993]
# dr = 0.2
# l2_pen,hl,dr

In [10]:
# model = build_keras_base(hidden_layers=hl,dropout_rate=dr,l2_penalty=l2_pen)

In [11]:
# based on https://www.pyimagesearch.com/2016/09/26/a-simple-neural-network-with-python-and-keras/
# model.fit(features, labels, epochs=100, batch_size=batch_size,
#     verbose=1)

# # show the accuracy on the testing set
# (loss, accuracy) = model.evaluate(test_features, test_labels,
#                                   batch_size=5, verbose=1)

# print("loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))

# model.save("./diff_model.hdf5")