## Mount Google Drive in Colab

In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

## Perform imports

In [0]:
import numpy as np
%tensorflow_version 1.x
import keras
from keras.layers import Input, Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from keras.models import Model
from keras.optimizers import SGD
import json
from os import listdir
from math import ceil
import pandas as pd

## Load balanced dataset

In [0]:
data = np.load('/content/drive/My Drive/Final Capstone/Balanced Data.npy')

## Separate image data from label data

In [0]:
# image data
X = data[:, :-1]
# label data
Y = data[:, -1:]
del data

## Reshape image and label data and create one-hot encoding for label data

In [0]:
# reshape image data to 50 x 50 x 3 format
X = np.reshape(X, (X.shape[0], 50, 50, 3), order='C')
# reshape label data to 1-D array
Y = np.reshape(Y, (Y.shape[0]))
# create one-hot encoding for label data
Y = keras.utils.to_categorical(Y, 2, 'float32')

## Set parameters over which to grid search

In [0]:
# fraction of data to use for validation--taken from the bottom of the dataset by default in Keras
val_split = 0.2
# number of filters used in first Conv2D layer
filters_list = [16, 32, 64]
# multiplier for number of filters in second Conv2D layer compared to first Conv2D layer
filters2_mult_list = [0.5, 1, 2]
# kernel size used in the Conv2D layers
kernel_size_list = [(3, 3), (5, 5)]
# pool size used in MaxPooling2D layers
pool_size = (2, 2)
# dropout rate used in Dropout
dropout_rate_list = [0.25, 0.5]
# width of hidden layer used prior to prediction dense layer
hidden_dense_size_list = [32, 64, 128]
# initial learning rate used in optimizer
initial_lr = 0.01

## Train, optimize, and evaluate models similarly to previous training, optimization, and evaluation of models

In [0]:
def optimize_model(model, X, Y, val_split, initial_lr):
  epoch_count = 0
  min_epochs = 5
  max_epochs = 20
  history_dict = None
  sgd = SGD(lr=initial_lr)
  sgd_lr = sgd.get_config()['lr']
  # while loop based on model train accuracy convergence
  while (True):
    # while loop based on learning rate reduction
    while (True):
      model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])
      old_weights = model.get_weights()
      history = model.fit(X, Y, epochs=1, validation_split=val_split, verbose=1)

      if (history_dict == None):
        history_dict = history.history
        break
      else:
        last_loss = history_dict['loss'][len(history_dict['loss']) - 1]
        curr_loss = history.history['loss'][0]
        if (curr_loss <= last_loss):
          for key in history_dict.keys():
            history_dict[key].append(history.history[key][0])
          break
        else:
          sgd = SGD(lr=sgd_lr/2)
          sgd_lr = sgd.get_config()['lr']
          model.set_weights(old_weights)

    epoch_count += 1

    if (epoch_count >= min_epochs):
      if (epoch_count == max_epochs):
        break
      else:
        if (history_dict['val_acc'][epoch_count - 1] < history_dict['val_acc'][epoch_count - 2]):
          break
  return history_dict

In [0]:
# Conv2D, Conv2D, MaxPooling, Flatten, Dropout, Dense, Dense

count = 0
for filters in filters_list:
  for filters2_mult in filters2_mult_list:
    for kernel_size in kernel_size_list:
      for dropout_rate in dropout_rate_list:
        for hidden_dense_size in hidden_dense_size_list:
          input_layer = Input(shape=X.shape[1:4])
          output_layer1 = Conv2D(filters=filters, kernel_size=kernel_size, strides=(1, 1), data_format='channels_last', activation='relu')(input_layer)
          output_layer2 = Conv2D(filters=int(filters * filters2_mult), kernel_size=kernel_size, strides=(1, 1), data_format='channels_last', activation='relu')(output_layer1)
          output_layer3 = MaxPooling2D(pool_size=pool_size, strides=(1, 1), data_format='channels_last')(output_layer2)
          output_layer4 = Flatten(data_format='channels_last')(output_layer3)
          output_layer5 = Dropout(rate=dropout_rate)(output_layer4)
          output_layer6 = Dense(units=hidden_dense_size, activation='relu')(output_layer5)
          output_layer7 = Dense(units=2, activation='softmax')(output_layer6)
          model = Model(input_layer, output_layer7)

          output_dict = optimize_model(model, X, Y, val_split, initial_lr)
          output_dict['filters'] = filters
          output_dict['filters2_mult'] = filters2_mult
          output_dict['kernel_size'] = kernel_size
          output_dict['dropout_rate'] = dropout_rate
          output_dict['hidden_dense_size'] = hidden_dense_size

          count += 1

          with open('/content/drive/My Drive/Final Capstone/Model Logs -- Selected Architecture/' + str(count) + '.json', 'w') as f:
            json.dump(output_dict, f)

In [0]:
path = '/content/drive/My Drive/Final Capstone/Model Logs -- Selected Architecture/'

model_dict = {
    'filters' : [],
    'filters2' : [],
    'kernel_size' : [],
    'dropout_rate' : [],
    'hidden_dense_size' : [],
    'max_epoch' : []
}

for file in listdir(path):
  with open(path + file) as f:
    results_dict = json.load(f)
  val_acc_list = results_dict['val_acc']

  if (len(val_acc_list) > 5):
    max_epoch = len(val_acc_list) - 1
  else:
    begin_index = len(val_acc_list) - 1
    while (True):
      if (val_acc_list[begin_index] >= val_acc_list[begin_index - 1]):
        max_epoch = begin_index + 1
        break
      else:
        begin_index -= 1

  model_dict['filters'].append(results_dict['filters'])
  model_dict['filters2'].append(int(results_dict['filters'] * results_dict['filters2_mult']))
  model_dict['kernel_size'].append(results_dict['kernel_size'])
  model_dict['dropout_rate'].append(results_dict['dropout_rate'])
  model_dict['hidden_dense_size'].append(results_dict['hidden_dense_size'])
  model_dict['max_epoch'].append(max_epoch)

In [0]:
def optimize_model(model, X, Y, val_split, initial_lr, epoch_to_stop):
  epoch_count = 0
  history_dict = None
  sgd = SGD(lr=initial_lr)
  sgd_lr = sgd.get_config()['lr']
  # while loop based on model train accuracy convergence
  while (True):
    # while loop based on learning rate reduction
    while (True):
      model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])
      old_weights = model.get_weights()
      history = model.fit(X, Y, epochs=1, validation_split=val_split, verbose=0)

      if (history_dict == None):
        history_dict = history.history
        break
      else:
        last_loss = history_dict['loss'][len(history_dict['loss']) - 1]
        curr_loss = history.history['loss'][0]
        if (curr_loss <= last_loss):
          for key in history_dict.keys():
            history_dict[key].append(history.history[key][0])
          break
        else:
          sgd = SGD(lr=sgd_lr/2)
          sgd_lr = sgd.get_config()['lr']
          model.set_weights(old_weights)

    epoch_count += 1

    if (epoch_count >= epoch_to_stop):
      break
  return model

In [0]:
def evaluate_model(X_val, Y_val, model):
  Y_pred = model.predict(X_val)
  pos_correct = 0
  neg_correct = 0
  pos = 0
  neg = 0
  for i in range(Y_pred.shape[0]):
    true = np.argmax(Y_val[i])
    pred = np.argmax(Y_pred[i])
    if (true == 1):
      pos += 1
      if (true == pred):
        pos_correct += 1
    elif (true == 0):
      neg += 1
      if (true == pred):
        neg_correct += 1
  
  sensitivity = (pos_correct/pos) * 100
  specificity = (neg_correct/neg) * 100
  accuracy = (pos_correct + neg_correct) / (pos + neg) * 100
  
  return (sensitivity, specificity, accuracy)

In [0]:
X_val = X[ceil(X.shape[0] * (1 - val_split)):]
Y_val = Y[ceil(X.shape[0] * (1 - val_split)):]

In [0]:
for i in range(len(model_dict['filters'])):
  filters = model_dict['filters'][i]
  filters2 = model_dict['filters2'][i]
  kernel_size = model_dict['kernel_size'][i]
  dropout_rate = model_dict['dropout_rate'][i]
  hidden_dense_size = model_dict['hidden_dense_size'][i]
  epochs_to_run = model_dict['max_epoch'][i]

  input_layer = Input(shape=X.shape[1:4])
  output_layer1 = Conv2D(filters=filters, kernel_size=kernel_size, strides=(1, 1), data_format='channels_last', activation='relu')(input_layer)
  output_layer2 = Conv2D(filters=filters2, kernel_size=kernel_size, strides=(1, 1), data_format='channels_last', activation='relu')(output_layer1)
  output_layer3 = MaxPooling2D(pool_size=pool_size, strides=(1, 1), data_format='channels_last')(output_layer2)
  output_layer4 = Flatten(data_format='channels_last')(output_layer3)
  output_layer5 = Dropout(rate=dropout_rate)(output_layer4)
  output_layer6 = Dense(units=hidden_dense_size, activation='relu')(output_layer5)
  output_layer7 = Dense(units=2, activation='softmax')(output_layer6)
  model = Model(input_layer, output_layer7)

  trained_model = optimize_model(model, X, Y, val_split, initial_lr, epochs_to_run)
  sensitivity, specificity, accuracy = evaluate_model(X_val, Y_val, trained_model)


  results_dict = {
  'filters':  filters,
  'filters2':  filters2,
  'kernel_size':  kernel_size,
  'dropout_rate':  dropout_rate,
  'hidden_dense_size':  hidden_dense_size,
  'epochs_to_run':  epochs_to_run,
  'sensitivity':  sensitivity,
  'specificity':  specificity,
  'accuracy':  accuracy
  }

  with open('/content/drive/My Drive/Final Capstone/Results Dicts/' + str(i + 1) + '.json', 'w') as f:
    json.dump(results_dict, f)

## Create DataFrame of hyperparameters and results

In [0]:
data_dict = {
  'filters':  [],
  'filters2':  [],
  'kernel_size':  [],
  'dropout_rate':  [],
  'hidden_dense_size':  [],
  'epochs_to_run':  [],
  'sensitivity':  [],
  'specificity':  [],
  'accuracy':  []
}

load_path = '/content/drive/My Drive/Final Capstone/Results Dicts/'
for file in listdir(load_path):
  with open(load_path + file, 'r') as f:
    results_dict = json.load(f)
  
  data_dict['filters'].append(results_dict['filters'])
  data_dict['filters2'].append(results_dict['filters2'])
  data_dict['kernel_size'].append(results_dict['kernel_size'])
  data_dict['dropout_rate'].append(results_dict['dropout_rate'])
  data_dict['hidden_dense_size'].append(results_dict['hidden_dense_size'])
  data_dict['epochs_to_run'].append(results_dict['epochs_to_run'])
  data_dict['sensitivity'].append(results_dict['sensitivity'])
  data_dict['specificity'].append(results_dict['specificity'])
  data_dict['accuracy'].append(results_dict['accuracy'])

results_df = pd.DataFrame(data=data_dict)

## Inspect Max and Min Values of Accuracy, Sensitivity, and Specificity

In [0]:
accuracy_series = results_df['accuracy']
sensitivity_series = results_df['sensitivity']
specificity_series = results_df['specificity']

print('Max Accuracy:  {}\nMin Accuracy:  {}\nMax Sensitivity:  {}\nMin Sensitivity:  {}\nMax Specificity:  {}\nMin Specificity:  {}'.format\
(accuracy_series.max(), accuracy_series.min(), sensitivity_series.max(), sensitivity_series.min(), specificity_series.max(), specificity_series.min()))

Max Accuracy:  84.67324721490462
Min Accuracy:  77.95093153902307
Max Sensitivity:  94.68760012816405
Min Sensitivity:  62.02499198974688
Max Specificity:  94.11394793107786
Min Specificity:  61.533140485473524


## Inspect based on decreasing specificity

In [0]:
results_df.sort_values(by=['specificity'], ascending=False).head(10)

Unnamed: 0,filters,filters2,kernel_size,dropout_rate,hidden_dense_size,epochs_to_run,sensitivity,specificity,accuracy
100,64,128,"[3, 3]",0.5,64,4,65.06248,94.113948,79.72514
106,64,128,"[5, 5]",0.5,64,6,62.024992,93.579424,77.950932
37,32,16,"[3, 3]",0.25,64,6,66.786286,93.026034,80.029835
17,16,16,"[3, 3]",0.5,128,5,72.111503,91.831216,82.064303
77,64,32,"[3, 3]",0.5,128,5,71.175905,91.642561,81.505697
101,64,128,"[3, 3]",0.5,128,4,76.07818,90.259087,83.235471
61,32,64,"[3, 3]",0.25,64,6,75.924383,90.070431,83.064081
102,64,128,"[5, 5]",0.25,32,4,73.450817,89.825179,81.715174
52,32,32,"[3, 3]",0.5,64,5,77.03941,89.359829,83.257689
12,16,16,"[3, 3]",0.25,32,4,74.61711,89.278078,82.016695


## Inspect based on decreasing specificity but with sensitvity greater than or equal to 80

In [0]:
results_df[results_df['sensitivity'] >= 80].sort_values(by=['specificity'], ascending=False).head(10)

Unnamed: 0,filters,filters2,kernel_size,dropout_rate,hidden_dense_size,epochs_to_run,sensitivity,specificity,accuracy
74,64,32,"[3, 3]",0.25,128,5,81.755847,87.014212,84.409814
104,64,128,"[5, 5]",0.25,128,5,80.980455,86.693498,83.863903
85,64,64,"[3, 3]",0.25,64,5,82.21083,86.347629,84.298727
68,32,64,"[5, 5]",0.25,128,6,82.383851,86.0898,84.254293
69,32,64,"[5, 5]",0.5,32,6,81.813521,86.014338,83.933729
43,32,16,"[5, 5]",0.25,64,4,80.493432,85.951453,83.248167
76,64,32,"[3, 3]",0.5,64,5,83.236142,85.932587,84.597074
39,32,16,"[3, 3]",0.5,32,4,80.192246,85.888567,83.067255
99,64,128,"[3, 3]",0.5,32,5,82.691445,85.850836,84.286032
59,32,32,"[5, 5]",0.5,128,5,82.415892,85.310024,83.876599


## Inspect based on decreasing sensitivity

In [0]:
results_df.sort_values(by=['sensitivity'], ascending=False).head(10)

Unnamed: 0,filters,filters2,kernel_size,dropout_rate,hidden_dense_size,epochs_to_run,sensitivity,specificity,accuracy
27,16,32,"[3, 3]",0.5,32,5,94.6876,61.53314,77.954105
84,64,64,"[3, 3]",0.25,32,5,94.520987,64.281222,79.258577
79,64,32,"[5, 5]",0.25,64,5,94.194169,62.168281,78.030279
55,32,32,"[5, 5]",0.25,64,4,92.822813,68.758647,80.67731
71,32,64,"[5, 5]",0.5,128,4,92.079462,71.060244,81.470784
96,64,128,"[3, 3]",0.25,32,4,92.066645,71.54446,81.708827
78,64,32,"[5, 5]",0.25,32,3,92.060237,65.601811,78.706319
16,16,16,"[3, 3]",0.5,64,8,91.58603,75.493649,83.463992
35,16,32,"[5, 5]",0.5,128,4,91.297661,71.355804,81.232742
6,16,8,"[5, 5]",0.25,32,7,91.099007,72.538046,81.731044


## Conclusions

There are large gaps between the sensitivities and specificities for models that best identify positives and best identify negatives.  In order to combat this, it would be useful to use multiple models for the final prediction.  Three models are to be included:  one that best identifies positives, one that best identifies negatives, and one that is generally accurate with a relatively small gap between the sensitivity and specificity.  When predictions from these three models are made simultaneously, increased confidence in the overall prediction results when the predictions from the individual models agree with each other.  The parameters for each model to be used are listed below.

**Good Specificity Model:**

Filters in 1st Convolutional Layer:  64

Filters in 2nd Convolutional Layer:  128

Kernel Size (both convolutional layers):  (3, 3)

Dropout Rate:  0.50

Size of Hidden Dense Layer:  64

Epochs to Train:  4

**Good Sensitivity Model:**

Filters in 1st Convolutional Layer:  16

Filters in 2nd Convolutional Layer:  32

Kernel Size (both convolutional layers):  (3, 3)

Dropout Rate:  0.50

Size of Hidden Dense Layer:  32

Epochs to Train:  5

**Good Overall Model:**

Filters in 1st Convolutional Layer:  64

Filters in 2nd Convolutional Layer:  32

Kernel Size (both convolutional layers):  (3, 3)

Dropout Rate:  0.50

Size of Hidden Dense Layer:  64

Epochs to Train:  5

It is interesting to note that the convolutional neural network outperforms the random forest model but not by as much as was expected.  This could change as the convolutional neural network is further optimized.

## Future Work

Opportunities to explore include:

-- impact of grayscale images

-- image augmentation through rotation, translation, flipping, etc.

-- deeper convolutional architectures

-- refining autoencoders for feature extraction