In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [9]:
# General imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
import seaborn as sns
from mlxtend.plotting import plot_decision_regions
from sklearn.metrics import confusion_matrix
import cv2

In [4]:
# graphing of accuracy and loss from Lindsey's Neural
def visualize_training_results(history):
    '''
    From https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/
    
    Input: keras history object (output from trained model)
    '''
    fig, (ax1, ax2) = plt.subplots(2, sharex=True)
    fig.suptitle('Model Results')

    # summarize history for accuracy
    ax1.plot(history.history['acc'])
    ax1.plot(history.history['val_acc'])
    ax1.set_ylabel('Accuracy')
    ax1.legend(['train', 'test'], loc='upper left')
    # summarize history for loss
    ax2.plot(history.history['loss'])
    ax2.plot(history.history['val_loss'])
    ax2.set_ylabel('Loss')
    ax2.legend(['train', 'test'], loc='upper left')
    
    plt.xlabel('Epoch')
    plt.show()

**Business Problem:**
Make a decision support tool for ER physicians looking at X_rays regarding lung issues for the first time

**Data Understanding**
- Because we want to train a neural network to help identify whether or not a subject has pneumonia or not based on a chest X-ray, this dataset of 5,232 chest X-rays from children will help us train the network and so that it can be of use to doctors. There are 3,883 pneumonia x-rays and 1,349 normal ones, so there is a class imbalance issue. Additionally, each image is a different size, so it is necessary to standardize the images before modelling. 
- In the context of this data, a false positive would mean that the neural network identifies an x-ray as showing evidence of pneumonia, when it is really a normal x-ray. A false negative would mean that the neural network identifies a pneumonia image as being normal.

**Loading an Image, to see what it looks like**

In [None]:
import PIL

In [None]:
image = PIL.Image.open('../input/chest-xray-pneumonia/chest_xray/test/NORMAL/IM-0001-0001.jpeg')

In [None]:
image

In [None]:
image.size

**Setting up a Generator to Load and Reshape Images**

In [5]:
# Your code here; transform the image files and then load them into Keras as tensors 
# (be sure to perform a train-val-test split)
import keras

# Instantiating a generator object and normalizing the RGB values
traingen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
testgen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
valgen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)

train_data = traingen.flow_from_directory(
    directory='../input/chest-xray-pneumonia/chest_xray/train',
    target_size=(150, 150),
    class_mode='binary',
    batch_size=20,
    shuffle=True,
    seed=42
)
train_labels = []
batch_index1 = 0

#while batch_index1 <= train_data.batch_index:
    #x1, y1 = train_data.next()
    #for i in range(len(y1)):
        #train_labels.extend(y1)
    #batch_index1 = batch_index1 + 1


test_data = testgen.flow_from_directory(
    directory='../input/chest-xray-pneumonia/chest_xray/test',
    target_size=(150, 150),
    class_mode='binary',
    batch_size=20,
    shuffle=True,
    seed=42
)
#test_labels = []
#batch_index2 = 0

#while batch_index2 <= test_data.batch_index:
    #x2, y2 = test_data.next()
    #for i in range(len(y2)):
        #test_labels.extend(y2)
    #batch_index2 = batch_index2 + 1

val_data = valgen.flow_from_directory(
    directory='../input/chest-xray-pneumonia/chest_xray/val',
    target_size=(150, 150),
    class_mode='binary',
    batch_size=20,
    shuffle=True,
    seed=42
)
#val_labels = []
#batch_index3 = 0

#while batch_index3 <= val_data.batch_index:
    #x3, y3 = val_data.next()
    #for i in range(len(y3)):
        #val_labels.extend(y3)
    #batch_index3 = batch_index3 + 1

In [5]:
# from https://www.kaggle.com/madz2000/pneumonia-detection-using-cnn-92-6-accuracy#Loading-the-Dataset
labels = ['PNEUMONIA', 'NORMAL']
img_size = 150
def get_training_data(data_dir):
    data = [] 
    for label in labels: 
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                data.append([resized_arr, class_num])
            except Exception as e:
                print(e)
    return np.array(data)

In [None]:
training = get_training_data('../input/chest-xray-pneumonia/chest_xray/train')
testing = get_training_data('../input/chest-xray-pneumonia/chest_xray/test')
validating = get_training_data('../input/chest-xray-pneumonia/chest_xray/val')

In [None]:
training[0][1]

In [None]:
training_labels = []
for x, y in training:
    training_labels.append(y)
len(training_labels)

In [None]:
testing_labels2 = []
for x, y in testing:
    testing_labels2.append(y)
len(testing_labels2)

In [None]:
validating_labels = []
for x, y in validating:
    validating_labels.append(y)
len(validating_labels)

**EDA**

In [None]:
(train_data.class_indices)


In [None]:
classes = pd.DataFrame(train_data.classes)
values = classes.value_counts()
class_dict = {0:'Normal', 1:'Pneumonia'}

In [None]:
classes[0] = classes[0].map(class_dict)

In [None]:
diag = classes[0].value_counts()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.4)
sns.barplot(diag.index, diag.values)
plt.ylabel("Number of Images")
plt.title('Distribution of Chest X-ray Images');

**Baseline Model**

In [None]:

#baseline_model = keras.Sequential(name='dense')
#baseline_model.add(Dense(500, activation='relu', input_shape=(150,150, 3))
#baseline_model.add(Dense(250, activation='relu'))
#baseline_model.add(Dense(1, activation='sigmoid'))

In [6]:
# Input shape to be used in all models
input_shape = (150,150,3)
output_shape = 1

In [7]:
baseline = keras.Sequential(
    [
        keras.Input(shape=input_shape), # Don't always need this input separately
        layers.Flatten(), # need to flatten our images to be one long array
        layers.Dense(100, activation="relu"),
        layers.Dense(output_shape, activation="sigmoid"),
    ])

baseline.summary()

In [None]:
baseline.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:

baseline_results = baseline.fit_generator(train_data,
                              steps_per_epoch=100,
                              epochs=10,
                              validation_data=test_data,)


In [None]:
baseline_results.history

In [None]:
visualize_training_results(baseline_results)

The model is definitely overfit, because the accuracy in training data is much higher than that of the validation data. This is a very simple neural network, and so it could benefit from adding another layer to learn patterns from. This might also help decrease loss.

**Adding another layer**

In [None]:
# Adding in the layers
two_hidden = keras.Sequential(
    [
        keras.Input(shape=input_shape), # Don't always need this input separately
        layers.Flatten(), # need to flatten our images to be one long array
        layers.Dense(100, activation='relu'),
        layers.Dense(50, activation='relu'),
        layers.Dense(output_shape, activation="sigmoid"),
    ])

two_hidden.summary()

In [None]:
# Compiling the two-layer model
two_hidden.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
# Fit the two-layer model 
two_hidden_results = two_hidden.fit_generator(train_data,
                              steps_per_epoch=100,
                              epochs=10,
                              validation_data=test_data)

In [None]:
visualize_training_results(two_hidden_results)

Once again, the model is overfit. Also, judging from this graph showing the accuracy and loss for both training and validation data, it looks like it could benefit from a greater number of training epochs, since it looks like the validation loss might decrease further.

**Double the number of epochs**

In [None]:
# Adding in the layers
more_epochs = keras.Sequential(
    [
        keras.Input(shape=input_shape), # Don't always need this input separately
        layers.Flatten(), # need to flatten our images to be one long array
        layers.Dense(100, activation='relu'),
        layers.Dense(50, activation='relu'),
        layers.Dense(output_shape, activation="sigmoid"),
    ])

more_epochs.summary()

In [None]:
# Compiling the two-layer model
more_epochs.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
# Fit the two-layer model 
more_epochs_results = more_epochs.fit_generator(train_data,
                              steps_per_epoch=100,
                              epochs=30,
                              validation_data=test_data)

In [None]:
visualize_training_results(more_epochs_results)

Once again, the model is overfitting. It also looks like the loss is oscillating a good deal, but not necessarily decreasing. It is probably time to try a Convolutional model, especially since this is an image classification problem. Adding convolutions will put filters on the images to help the model pick up on patterns better.

**Building a Convolutional Neural Network**

In [None]:
# Set up for this CNN model is from this blog:  https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/
cnn = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn.add(layers.Flatten())
cnn.add(layers.Dense(128, activation='relu'))
cnn.add(layers.Dense(1, activation='sigmoid'))

cnn.compile(loss='binary_crossentropy',
            optimizer="adam",
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
cnn.summary()

In [None]:
cnn_results = cnn.fit_generator(train_data,
                              steps_per_epoch=100,
                              epochs=10,
                              validation_data=test_data)

In [None]:
visualize_training_results(cnn_results)

This cnn models requires much improvement, because it is overfitting and as the training accuracy increases, the testing accuracy decreases, and as the training loss decreasesthe testing loss increases quite a lot.
Reading the keras documentation for adam optimizers, there was a note discussing how for some types of CNN models, the default value for the hyperparameter epsilon in adam (1e-7) may not be the best; they suggest trying bigger values such as 0.1 or 1, so I will try this in the next model.

**CNN with a bigger epsilon (0.1)**

In [11]:
adam_ep = keras.optimizers.Adam(epsilon=0.1)

In [34]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_ep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_ep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_ep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_ep.add(layers.MaxPooling2D((2, 2)))
cnn_ep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_ep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_ep.add(layers.MaxPooling2D((2, 2)))
cnn_ep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_ep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_ep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_ep.add(layers.Flatten())
cnn_ep.add(layers.Dense(128, activation='relu'))
cnn_ep.add(layers.Dense(1, activation='sigmoid'))

cnn_ep.compile(loss='binary_crossentropy',
            optimizer= adam_ep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [35]:
cnn_ep_results = cnn_ep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [37]:
visualize_training_results(cnn_ep_results)

Increasing the epsilon value seems to reduce the validation data loss, and to increase the validation data accuracy. It also seems to reduce some of the overfitting that was taking place in previous models. It would also be interesting to see what changing the learning rate does to the model.

**CNN with a bigger epsilon (0.1) and smaller learning rate (0.0001)**

In [10]:
adam_lep = keras.optimizers.Adam(learning_rate=0.0001, epsilon=0.1)

In [None]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_lep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_lep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_lep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_lep.add(layers.MaxPooling2D((2, 2)))
cnn_lep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_lep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_lep.add(layers.MaxPooling2D((2, 2)))
cnn_lep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_lep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_lep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_lep.add(layers.Flatten())
cnn_lep.add(layers.Dense(128, activation='relu'))
cnn_lep.add(layers.Dense(1, activation='sigmoid'))

cnn_lep.compile(loss='binary_crossentropy',
            optimizer= adam_lep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [None]:
cnn_lep_results = cnn_lep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [None]:
visualize_training_results(cnn_lep_results)

Decreasing the learning rate did not have a good effect on the model; it is clearly still overfitting and the testing accuracy 

**CNN with changed epsilon and L2 regularization**

In [12]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_rep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_rep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_rep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_rep.add(layers.MaxPooling2D((2, 2)))
cnn_rep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_rep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_rep.add(layers.MaxPooling2D((2, 2)))
cnn_rep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_rep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_rep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_rep.add(layers.Flatten())
cnn_rep.add(layers.Dense(128, kernel_regularizer= regularizers.l2(0.01), activation='relu'))
cnn_rep.add(layers.Dense(1, activation='sigmoid'))

cnn_rep.compile(loss='binary_crossentropy',
            optimizer= adam_ep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [13]:
cnn_rep = cnn_rep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [14]:
visualize_training_results(cnn_rep)

Using regularization decreased the overfitting problem and also helped to decrease the loss in both training and testing data; however, there is still a good deal of loss going on, so further iterations are needed to decrease this loss. To see if L1 regularization will decrease the loss, I will try this next.

**CNN with changed epsilon and L1 regularization**

In [15]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_r1ep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_r1ep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_r1ep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_r1ep.add(layers.MaxPooling2D((2, 2)))
cnn_r1ep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_r1ep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_r1ep.add(layers.MaxPooling2D((2, 2)))
cnn_r1ep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_r1ep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_r1ep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_r1ep.add(layers.Flatten())
cnn_r1ep.add(layers.Dense(128, kernel_regularizer= regularizers.l1(0.01), activation='relu'))
cnn_r1ep.add(layers.Dense(1, activation='sigmoid'))

cnn_r1ep.compile(loss='binary_crossentropy',
            optimizer= adam_ep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [16]:
cnn_r1ep = cnn_r1ep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [17]:
visualize_training_results(cnn_r1ep)

L1 Regularization caused the model to perform much worse than when L2 was used; accuracy is less and loss is more, and there is a big overfitting problem. Even though L2 regularization looks like it may be beneficial, for now I am going to take out all regularization in order to more clearly see the difference which results from adding another layer, in the next model iteration.

**CNN model with changed epsilon and added Dense layer**

In [18]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_dep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_dep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_dep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_dep.add(layers.MaxPooling2D((2, 2)))
cnn_dep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_dep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_dep.add(layers.MaxPooling2D((2, 2)))
cnn_dep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_dep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_dep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_dep.add(layers.Flatten())
cnn_dep.add(layers.Dense(128, activation='relu'))
cnn_dep.add(layers.Dense(64, activation='relu'))
cnn_dep.add(layers.Dense(1, activation='sigmoid'))

cnn_dep.compile(loss='binary_crossentropy',
            optimizer= adam_ep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [19]:
cnn_dep = cnn_dep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [20]:
visualize_training_results(cnn_dep)

The results of accuracy and loss for training and testing data is not much different than the CNN model with changed epsilon and one layer. However, maybe adding dropout layers will improve the results.

**CNN model with changed epsilon, added layer, and dropout layers**

In [21]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_ddep = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_ddep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_ddep.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_ddep.add(layers.MaxPooling2D((2, 2)))
cnn_ddep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_ddep.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_ddep.add(layers.MaxPooling2D((2, 2)))
cnn_ddep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_ddep.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_ddep.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_ddep.add(layers.Flatten())
cnn_ddep.add(layers.Dense(128, activation='relu'))
cnn_ddep.add(layers.Dropout(0.25))
cnn_ddep.add(layers.Dense(64, activation='relu'))
cnn_ddep.add(layers.Dropout(0.25))
cnn_ddep.add(layers.Dense(1, activation='sigmoid'))

cnn_ddep.compile(loss='binary_crossentropy',
            optimizer= adam_ep,
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [22]:
cnn_ddep = cnn_ddep.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [23]:
visualize_training_results(cnn_ddep)

The main thing that is different about this model is that the loss is higher for training and testing data. In the next iteration, I will try using a normal epsilon to see if this works better with multiple layers and dropout layers.

**CNN model with normal epsilon, added layers, added Dropout layers**

In [24]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_dd = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_dd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_dd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_dd.add(layers.MaxPooling2D((2, 2)))
cnn_dd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_dd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_dd.add(layers.MaxPooling2D((2, 2)))
cnn_dd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_dd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_dd.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_dd.add(layers.Flatten())
cnn_dd.add(layers.Dense(128, activation='relu'))
cnn_dd.add(layers.Dropout(0.25))
cnn_dd.add(layers.Dense(64, activation='relu'))
cnn_dd.add(layers.Dropout(0.25))
cnn_dd.add(layers.Dense(1, activation='sigmoid'))

cnn_dd.compile(loss='binary_crossentropy',
            optimizer= 'adam',
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [25]:
cnn_dd = cnn_dd.fit_generator(train_data,
                              steps_per_epoch=50,
                              epochs=10,
                              validation_data=test_data)

In [26]:
visualize_training_results(cnn_dd)

There is a bigger overfitting problem with this model than in the last iteration; maybe having a bigger epsilon (0.1) is better. Or perhaps using a different optimizer would improve results; will try SGD in next model iteration.

**CNN model with SGD, normal epsilon, added layers, added Dropout layers**

In [27]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_sdd = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_sdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_sdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_sdd.add(layers.MaxPooling2D((2, 2)))
cnn_sdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_sdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_sdd.add(layers.MaxPooling2D((2, 2)))
cnn_sdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_sdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_sdd.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_sdd.add(layers.Flatten())
cnn_sdd.add(layers.Dense(128, activation='relu'))
cnn_sdd.add(layers.Dropout(0.25))
cnn_sdd.add(layers.Dense(64, activation='relu'))
cnn_sdd.add(layers.Dropout(0.25))
cnn_sdd.add(layers.Dense(1, activation='sigmoid'))

cnn_sdd.compile(loss='binary_crossentropy',
            optimizer= 'sgd',
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [28]:
cnn_sdd = cnn_sdd.fit_generator(train_data,
                              steps_per_epoch=100,
                              epochs=10,
                              validation_data=test_data)

In [29]:
visualize_training_results(cnn_sdd)

Changing to SGD optimizer resulted in a very big difference between the loss in training and test data. Will try seeing if having a greater number of epochs will help decrease loss.

**CNN model with fewer steps per epoch, more epochs, SGD, normal epsilon, added layers, added Dropout layers**

In [30]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_smdd = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_smdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_smdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_smdd.add(layers.MaxPooling2D((2, 2)))
cnn_smdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_smdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_smdd.add(layers.MaxPooling2D((2, 2)))
cnn_smdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_smdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_smdd.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_smdd.add(layers.Flatten())
cnn_smdd.add(layers.Dense(128, activation='relu'))
cnn_smdd.add(layers.Dropout(0.25))
cnn_smdd.add(layers.Dense(64, activation='relu'))
cnn_smdd.add(layers.Dropout(0.25))
cnn_smdd.add(layers.Dense(1, activation='sigmoid'))

cnn_smdd.compile(loss='binary_crossentropy',
            optimizer= 'sgd',
            metrics=['acc', 'Recall', 'Precision','TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [31]:
cnn_smdd_results = cnn_smdd.fit_generator(train_data,
                              steps_per_epoch=30,
                              epochs=50,
                              validation_data=test_data)

In [33]:
visualize_training_results(cnn_smdd_results)

Increasing the number of epochs did seem to decrease loss and the difference in loss between training and testing data.

**CNN model with early stopping, fewer steps per epoch, more epochs, SGD, normal epsilon, added layers, added Dropout layers**

In [38]:
early_stop = [EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ModelCheckpoint(filepath='best_model.h5', monitor='val_loss',
                           save_best_only=True)]

In [39]:
# An example of a CNN set up from this GitHub: https://github.com/flatiron-school/DSLE-083021-Phase4-NN-Review/blob/main/Phase4Review-NNs-Text-Images.ipynb
cnn_esmdd = keras.Sequential()
# We defined a variable input_shape earlier, can use that here
cnn_esmdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn_esmdd.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn_esmdd.add(layers.MaxPooling2D((2, 2)))
cnn_esmdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_esmdd.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_esmdd.add(layers.MaxPooling2D((2, 2)))
cnn_esmdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_esmdd.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn_esmdd.add(layers.MaxPooling2D((2, 2)))

# now, to get the proper output
cnn_esmdd.add(layers.Flatten())
cnn_esmdd.add(layers.Dense(128, activation='relu'))
cnn_esmdd.add(layers.Dropout(0.25))
cnn_esmdd.add(layers.Dense(64, activation='relu'))
cnn_esmdd.add(layers.Dropout(0.25))
cnn_esmdd.add(layers.Dense(1, activation='sigmoid'))

cnn_esmdd.compile(loss='binary_crossentropy',
            optimizer= 'sgd',
            metrics=['acc', 'Recall', 'Precision', 'TruePositives', 'TrueNegatives', 'FalsePositives', 'FalseNegatives'])

In [40]:
cnn_esmdd_results = cnn_esmdd.fit_generator(train_data,
                              steps_per_epoch=30,
                              epochs=50,
                              callbacks=early_stop,
                              validation_data=test_data)

In [41]:
visualize_training_results(cnn_esmdd_results)

The end result of accuracy and loss for training and testing data is not too different from the other graph; one interesting thing to note about this model is that in epoch 10, there is virutally no difference in loss between training and testing data, and very little in accuracy; this epoch number results in the least overfitting of this model.