In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
idg = ImageDataGenerator(rescale=1./255)

train_set = idg.flow_from_directory('chest_xray/train',
                                                 target_size=(150, 150),
                                                 batch_size=32,
                                                 class_mode='binary',
                                                 color_mode='grayscale')

val_set = idg.flow_from_directory('chest_xray/val',
                                          target_size=(150, 150),
                                          batch_size=32,
                                          class_mode='binary',
                                          color_mode='grayscale')

test_set = idg.flow_from_directory('chest_xray/test',
                                            target_size=(150, 150),
                                            batch_size=32,
                                            class_mode='binary',
                                            color_mode='grayscale')

Found 5022 images belonging to 2 classes.
Found 210 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [3]:
# Setting paramaters on early stopping
earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=20,
                          verbose=1,
                          mode='min',
                          restore_best_weights=True)

In [4]:
# Building the model
model1 = models.Sequential()
model1.add(layers.Conv2D(32, 7, input_shape=(150,150,1), padding='same',
                         activation='relu'))
model1.add(layers.MaxPooling2D(2))
model1.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
model1.add(layers.MaxPooling2D(2))
model1.add(layers.Flatten())
model1.add(layers.Dense(64, activation='relu'))
model1.add(layers.Dense(32, activation='relu'))
model1.add(layers.Dense(1, activation='sigmoid'))
model1.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc', tf.metrics.Recall()])
model1.summary()

In [7]:
# Fitting the model
history1 = model1.fit(train_set,
                    validation_data=val_set,
                    steps_per_epoch=100,
                    epochs=30,
                    callbacks=[earlystop]
                    )

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [8]:
model1.evaluate(test_set)



[3.1034023761749268, 0.7483974099159241, 0.9948717951774597]

In [10]:
# Building the model
second_model = models.Sequential()
second_model.add(layers.Conv2D(32, 7, input_shape=(150,150,1), padding='valid',
                         activation='relu'))
second_model.add(layers.MaxPooling2D(2))
second_model.add(layers.Conv2D(64, 3, padding='valid', activation='relu'))
second_model.add(layers.MaxPooling2D(2))
second_model.add(layers.Flatten())
second_model.add(layers.Dense(64, activation='relu'))
second_model.add(layers.Dense(32, activation='relu'))
second_model.add(layers.Dense(1, activation='sigmoid'))
second_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc', tf.metrics.Recall()])
second_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 144, 144, 32)      1600      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 72, 72, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 70, 70, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 35, 35, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 78400)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                5017664   
_________________________________________________________________
dense_4 (Dense)              (None, 32)               

In [11]:
# Fitting the model
history2 = second_model.fit(train_set,
                    validation_data=val_set,
                    steps_per_epoch=100,
                    epochs=30,
                    callbacks=[earlystop]
                    )

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Restoring model weights from the end of the best epoch.
Epoch 00023: early stopping


In [12]:
second_model.evaluate(test_set)



[0.9116288423538208, 0.7676281929016113, 0.9897435903549194]

In [15]:
# Building the model
third_model = models.Sequential()
third_model.add(layers.Conv2D(32, 7, input_shape=(150,150,1), padding='valid',
                         activation='relu'))
third_model.add(layers.MaxPooling2D(2))
third_model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
third_model.add(layers.MaxPooling2D(2))
third_model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
third_model.add(layers.MaxPooling2D(3))
third_model.add(layers.Flatten())
third_model.add(layers.Dense(64, activation='relu'))
third_model.add(layers.Dense(1, activation='sigmoid'))
third_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc', tf.metrics.Recall()])
third_model.summary()

# Fitting the model
history3 = third_model.fit(train_set,
                    validation_data=val_set,
                    steps_per_epoch=100,
                    epochs=30,
                    callbacks=[earlystop]
                    )

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 144, 144, 32)      1600      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 72, 72, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 36, 36, 64)        36928     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 9216)             

In [16]:
third_model.evaluate(test_set)



[0.7861124277114868, 0.7644230723381042, 0.9923076629638672]

In [18]:
from sklearn.utils.class_weight import compute_class_weight

# Calculating the class weights
weights = compute_class_weight('balanced', np.unique(train_set.labels),
                               train_set.labels)
# Zipping the class weights into a dictionary
weights = dict(zip(np.unique(train_set.labels), weights))
# Building the model
weighted_model = models.Sequential()
weighted_model.add(layers.Conv2D(32, 7, input_shape=(150,150,1), padding='same',
                         activation='relu'))
weighted_model.add(layers.MaxPooling2D(2))
weighted_model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
weighted_model.add(layers.MaxPooling2D(2))
weighted_model.add(layers.Flatten())
weighted_model.add(layers.Dense(64, activation='relu'))
weighted_model.add(layers.Dense(32, activation='relu'))
weighted_model.add(layers.Dense(1, activation='sigmoid'))
# Compiling the model and looking at the summary
weighted_model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', tf.metrics.Recall()])
weighted_model.summary()
# Fitting the model
history3 = weighted_model.fit(train_set,
                    validation_data=val_set,
                    steps_per_epoch=100,
                    epochs=30,
                    class_weight=weights,
                    callbacks=[earlystop]
                    )



Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 150, 150, 32)      1600      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 75, 75, 32)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 75, 75, 64)        18496     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 37, 37, 64)        0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 87616)             0         
_________________________________________________________________
dense_10 (Dense)             (None, 64)                5607488   
_________________________________________________________________
dense_11 (Dense)             (None, 32)               

In [19]:
weighted_model.evaluate(test_set)



[3.2625746726989746, 0.7291666865348816, 0.9923076629638672]

In [23]:
# This is the same as the best previous model with image augmentation
augment_gen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=.2,
                                   height_shift_range=.2,
                                   shear_range=.2,
                                   zoom_range=.2,
                                   horizontal_flip=False,
                                   brightness_range=[.5, 1.5])

augment_set = augment_gen.flow_from_directory('data/chest_xray/train',
                                                 target_size=(150, 150),
                                                 batch_size=32,
                                                 class_mode='binary',
                                                 color_mode='grayscale')

augmented = models.Sequential()
augmented.add(layers.Conv2D(32, 7, padding='valid', input_shape=(150,150,1), activation='relu'))
augmented.add(layers.MaxPooling2D(2))
augmented.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
augmented.add(layers.MaxPooling2D(2))
augmented.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
augmented.add(layers.MaxPooling2D(3))
augmented.add(layers.Flatten())
augmented.add(layers.Dense(64, activation='relu'))
augmented.add(layers.Dense(1, activation='sigmoid'))
augmented.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc', tf.metrics.Recall()])
augmented.summary()

# Fitting the model
historyaug = augmented.fit(augment_set,
                    validation_data=val_set,
                    steps_per_epoch=100,
                    epochs=30,
                    callbacks=[earlystop])

Found 5216 images belonging to 2 classes.
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 144, 144, 32)      1600      
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 72, 72, 32)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 36, 36, 64)        36928     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_5 (F

In [24]:
augmented.evaluate(test_set)



[0.24586127698421478, 0.8862179517745972, 0.9179487228393555]