### Imports

In [1]:
import tensorflow as tf

from keras.applications import inception_resnet_v2
from keras.optimizers import adam_v2
from keras.losses import BinaryCrossentropy
from keras.layers import Dense, Flatten

import matplotlib.pyplot as plt

### Constants

In [2]:
# Get train and test data from folder.
TRAIN_DIRECTORY = "E:/MattDataHelp/chest_xray/train"
VALIDATION_DIRECTORY = "E:/MattDataHelp/chest_xray/val"
TEST_DIRECTORY = "E:/MattDataHelp/chest_xray/test"

IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32  # This is keras default
IMAGE_DIMENSIONS = (IMG_HEIGHT, IMG_WIDTH)
SEED = 123

NUM_CLASSES = 2

NUM_EPOCHS = 20
LEARNING_RATE = 0.001

### Load Data

In [3]:
print("Training:")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_DIRECTORY,
    image_size=IMAGE_DIMENSIONS,
    batch_size=BATCH_SIZE,
    color_mode = "rgb",
    shuffle = True,
    seed = SEED,
)
print()

print("Validation:")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        VALIDATION_DIRECTORY,
        image_size=IMAGE_DIMENSIONS,
        batch_size=BATCH_SIZE,
        color_mode = "rgb",
        shuffle = True,
        seed = SEED,
    )

print("Test:")
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TEST_DIRECTORY,
    image_size=IMAGE_DIMENSIONS,
    batch_size=BATCH_SIZE,
    color_mode = "rgb",
    shuffle = True,
    seed = SEED,
)

Training:
Found 5216 files belonging to 2 classes.

Validation:
Found 16 files belonging to 2 classes.
Test:
Found 624 files belonging to 2 classes.


#### Configure the dataset for performance

In [4]:
# AUTOTUNE = tf.data.AUTOTUNE
#
# train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
# val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### Pre-Process Data

In [5]:
# for image_batch, labels_batch in train_ds:
#     # in-place pre-processing of images
# #     updated_image_batch = vgg16.preprocess_input(image_batch)
# #     train_ds
#
# train_ds = tf.keras.applications.mobilenet.preprocess_input(train_ds)
# val_ds = tf.keras.applications.mobilenet.preprocess_input(val_ds)
# test_ds = tf.keras.applications.mobilenet.preprocess_input(test_ds)

In [6]:
# # See here for implementation https://keras.io/api/applications/vgg/#vgg16-function
# model = inception_resnet_v2.InceptionResNetV2(
#     include_top=False,
#     weights=None,
#     input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
#     pooling='avg',  # Global average pooling applied.
#     classes=NUM_CLASSES,
# )
#
# # Add dense layers to perform prediction
# model.layers.append(Flatten())
# model.layers.append(Dense(units=4096, activation="relu"))
# model.layers.append(Dense(units=4096, activation="relu"))
# model.layers.append(Dense(units=NUM_CLASSES, activation="softmax"))

In [7]:
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 200x200 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # # The fifth convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('dandelions') and 1 for the other ('grass')
    tf.keras.layers.Dense(1, activation='sigmoid')
    ])

In [8]:
optimizer = adam_v2.Adam(learning_rate=LEARNING_RATE)
print("Learning rate:", LEARNING_RATE)

# sparse as Y values are stored as integers (0-42), rather than one-hot encoded (i.e. 3 class, [1,0,0], [0,1,0], [0,0,1])
loss = BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

Learning rate: 0.001


In [None]:
hist = model.fit(
    x=train_ds,
    epochs=NUM_EPOCHS,
    verbose=True,
    validation_data=val_ds)

Epoch 1/20
Epoch 2/20

In [None]:
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["training", "validation"])
plt.show()

In [None]:
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["training", "validation"])
plt.show()

In [None]:
preds = model.evaluate(test_ds, verbose = True)

In [None]:
#ROC

fpr, tpr, _ = roc_curve(validation_generator.classes, preds)
roc_auc = auc(fpr, tpr)
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()