## Mount Google Drive in Colab

In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

## Perform imports

In [0]:
import numpy as np
%tensorflow_version 1.x
import keras
from keras.layers import Input, Dense, Conv2D, Flatten, Conv2DTranspose
from keras.models import Model
from keras.optimizers import SGD

TensorFlow 1.x selected.


Using TensorFlow backend.


## Load balanced dataset

In [0]:
data = np.load('/content/drive/My Drive/Final Capstone/Balanced Data.npy')

## Separate image data from label data

In [0]:
# image data
X = data[:, :-1]
# label data
Y = data[:, -1:]
del data

## Reshape image and label data and create one-hot encoding for label data

In [0]:
# reshape image data to 50 x 50 x 3 format
X = np.reshape(X, (X.shape[0], 50, 50, 3), order='C')
# reshape label data to 1-D array
Y = np.reshape(Y, (Y.shape[0]))
# create one-hot encoding for label data
Y = keras.utils.to_categorical(Y, 2, 'float32')

## Create, compile, and fit autoencoder

In [0]:
input_layer = Input(shape=X.shape[1:4])
encode_layer1 = Conv2D(filters=32, kernel_size=(3, 3), data_format='channels_last', activation='relu')(input_layer)
encode_layer2 = Conv2D(filters=32, kernel_size=(3, 3), data_format='channels_last', activation='relu')(encode_layer1)
decode_layer1 = Conv2DTranspose(filters=32, kernel_size=(3, 3), data_format='channels_last', activation='relu')(encode_layer2)
decode_layer2 = Conv2DTranspose(filters=3, kernel_size=(3, 3), data_format='channels_last', activation='sigmoid')(decode_layer1)
model = Model(input_layer, decode_layer2)

model.compile(optimizer='sgd', loss='mse')
model.fit(X, X, epochs=30, verbose=1, validation_split=0.2)

## Extract encoder from autoencoder

In [0]:
encoder = Model(model.input, model.layers[2].output)

## Create model that uses encoder output as input to dense layers

In [0]:
flatten_layer = Flatten(data_format='channels_last')(encoder.output)
output_layer = Dense(units=64, activation='relu')(flatten_layer)
output_layer1 = Dense(units=2, activation='softmax')(output_layer)

overall_model = Model(encoder.input, output_layer1)

## Train and evaluate model

The model is trained and evaluated similarly to the other architectures.

In [0]:
# function used to train models, same as function of same name in 'Run NN Architectures -- #4.ipynb'
def optimize_model(model, X, Y, val_split, initial_lr):
  epoch_count = 0
  min_epochs = 5
  max_epochs = 20
  history_dict = None
  sgd = SGD(lr=initial_lr)
  sgd_lr = sgd.get_config()['lr']
  # while loop based on model train accuracy convergence
  while (True):
    # while loop based on learning rate reduction
    while (True):
      model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])
      old_weights = model.get_weights()
      history = model.fit(X, Y, epochs=1, validation_split=val_split, verbose=1)

      if (history_dict == None):
        history_dict = history.history
        break
      else:
        last_loss = history_dict['loss'][len(history_dict['loss']) - 1]
        curr_loss = history.history['loss'][0]
        if (curr_loss <= last_loss):
          for key in history_dict.keys():
            history_dict[key].append(history.history[key][0])
          break
        else:
          sgd = SGD(lr=sgd_lr/2)
          sgd_lr = sgd.get_config()['lr']
          model.set_weights(old_weights)

    epoch_count += 1

    if (epoch_count >= min_epochs):
      if (epoch_count == max_epochs):
        break
      else:
        if (history_dict['val_acc'][epoch_count - 1] < history_dict['val_acc'][epoch_count - 2]):
          break
  return history_dict

In [0]:
results_dict = optimize_model(overall_model, X, Y, val_split=0.2, initial_lr=0.01)

In [0]:
val_acc_list = results_dict['val_acc']

if (len(val_acc_list) > 5):
  max_epoch = len(val_acc_list) - 1
else:
  begin_index = len(val_acc_list) - 1
  while (True):
    if (val_acc_list[begin_index] >= val_acc_list[begin_index - 1]):
      max_epoch = begin_index + 1
      break
    else:
      begin_index -= 1

In [0]:
def optimize_model(model, X, Y, val_split, initial_lr, epoch_to_stop):
  epoch_count = 0
  history_dict = None
  sgd = SGD(lr=initial_lr)
  sgd_lr = sgd.get_config()['lr']
  # while loop based on model train accuracy convergence
  while (True):
    # while loop based on learning rate reduction
    while (True):
      model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])
      old_weights = model.get_weights()
      history = model.fit(X, Y, epochs=1, validation_split=val_split, verbose=1)

      if (history_dict == None):
        history_dict = history.history
        break
      else:
        last_loss = history_dict['loss'][len(history_dict['loss']) - 1]
        curr_loss = history.history['loss'][0]
        if (curr_loss <= last_loss):
          for key in history_dict.keys():
            history_dict[key].append(history.history[key][0])
          break
        else:
          sgd = SGD(lr=sgd_lr/2)
          sgd_lr = sgd.get_config()['lr']
          model.set_weights(old_weights)

    epoch_count += 1

    if (epoch_count >= epoch_to_stop):
      break
  return model

In [0]:
def evaluate_model(X_val, Y_val, model):
  Y_pred = model.predict(X_val)
  pos_correct = 0
  neg_correct = 0
  pos = 0
  neg = 0
  for i in range(Y_pred.shape[0]):
    true = np.argmax(Y_val[i])
    pred = np.argmax(Y_pred[i])
    if (true == 1):
      pos += 1
      if (true == pred):
        pos_correct += 1
    elif (true == 0):
      neg += 1
      if (true == pred):
        neg_correct += 1
  
  sensitivity = (pos_correct/pos) * 100
  specificity = (neg_correct/neg) * 100
  accuracy = (pos_correct + neg_correct) / (pos + neg) * 100
  
  print('Sensitivity:  {}%\nSpecificity:  {}%\nAccuracy:  {}%'.format(sensitivity, specificity, accuracy))

In [0]:
from math import ceil
X_val = X[ceil(X.shape[0] * (1 - 0.2)):]
Y_val = Y[ceil(X.shape[0] * (1 - 0.2)):]

In [0]:
flatten_layer = Flatten(data_format='channels_last')(encoder.output)
output_layer = Dense(units=64, activation='relu')(flatten_layer)
output_layer1 = Dense(units=2, activation='softmax')(output_layer)

overall_model = Model(encoder.input, output_layer1)

trained_model = optimize_model(overall_model, X, Y, val_split=0.2, initial_lr=0.01, epoch_to_stop=max_epoch)

In [0]:
evaluate_model(X_val, Y_val, trained_model)

Sensitivity:  79.07081063761615%
Specificity:  89.02024902527984%
Accuracy:  84.09242390579871%


The model using an autoencoder to perform feature extraction results in roughly a similar accuracy than the one selected from the other models but has greater volatility between the sensitivity and specificity.  Therefore, the model chosen from the architecture not using an autoencoder is selected for grid searching of hyperparameters.