# Lung Scans

Codacademy Exercise: Deep Learning Classification

Diagnose pneumonia, covid-19, or no illness, based on a patient's x-ray scan.

In [None]:
import math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

BATCH_SIZE = 32

In [None]:
# load training images and preprocess with augmentation
print('Loading training data...')
training_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255,         # pixel normalization
    zoom_range=0.1,          # randomly increase or decrease the size of the image by up to 10%
    rotation_range=25,       # randomly rotate the image between -25,25 degrees
    height_shift_range=0.05, # Shift the image along its height by up to +/- 5%
    width_shift_range=0.05,  # Shift the image along its width by up to +/- 5%
)
print(training_data_generator.__dict__)
training_iterator = training_data_generator.flow_from_directory(
    directory='dataset/train',
    class_mode='categorical',
    color_mode='grayscale',
    target_size=(256, 256),
    batch_size=BATCH_SIZE,
)
sample_batch_input, sample_batch_labels = training_iterator.next()
print(sample_batch_input.shape, sample_batch_labels.shape)

In [None]:
# load validation data without augmentation
print('Loading validation data...')
validation_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255, # pixel normalization
)
print(validation_data_generator.__dict__)
validation_iterator =  validation_data_generator.flow_from_directory(
    directory='dataset/test',
    class_mode='categorical',
    color_mode='grayscale',
    target_size=(256, 256),
    batch_size=BATCH_SIZE,
)
sample_batch_input, sample_batch_labels = training_iterator.next()
print(sample_batch_input.shape, sample_batch_labels.shape)

In [None]:
# build the model
print('Building the model...')
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(256, 256, 1))) # input layer

model.add(tf.keras.layers.Conv2D(filters=5, kernel_size=5, strides=3, padding='valid', activation="relu"))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(tf.keras.layers.Dropout(0.1))

model.add(tf.keras.layers.Conv2D(filters=3, kernel_size=3, strides=1, padding='valid', activation="relu"))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Flatten())
#model.add(tf.keras.layers.Dense(8, activation="relu"))
model.add(tf.keras.layers.Dense(3, activation="softmax"))
model.summary()

In [None]:
# compile the model
print('Compiling the model...')
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.AUC(name='auc')]
)

In [None]:
# train the model
print('Training the model...')
es = tf.keras.callbacks.EarlyStopping(monitor='val_auc', mode='min', verbose=1, patience=20)
history = model.fit(
    training_iterator,
    steps_per_epoch=training_iterator.samples/BATCH_SIZE,
    epochs=50,
    validation_data=validation_iterator,
    validation_steps=validation_iterator.samples/BATCH_SIZE,
    callbacks=[es],
)

In [None]:
print(history)
print(history.params)
print(history.history.keys())

# plotting categorical and validation accuracy over epochs
fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax1.plot(history.history['categorical_accuracy'])
ax1.plot(history.history['val_categorical_accuracy'])
ax1.set_title('model accuracy')
ax1.set_xlabel('epoch')
ax1.set_ylabel('accuracy')
ax1.legend(['train', 'validation'], loc='upper left')

# plotting auc and validation auc over epochs
ax2 = fig.add_subplot(2, 1, 2)
ax2.plot(history.history['auc'])
ax2.plot(history.history['val_auc'])
ax2.set_title('model auc')
ax2.set_xlabel('epoch')
ax2.set_ylabel('auc')
ax2.legend(['train', 'validation'], loc='upper left')

fig.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# classification report
test_steps_per_epoch = math.ceil(validation_iterator.samples / validation_iterator.batch_size)
predictions = model.predict(validation_iterator, steps=test_steps_per_epoch)
test_steps_per_epoch = math.ceil(validation_iterator.samples / validation_iterator.batch_size)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = validation_iterator.classes
class_labels = list(validation_iterator.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)

# confusion matrix
cm=confusion_matrix(true_classes, predicted_classes)
print(cm)