In [1]:
import os, shutil
from pathlib import Path
import time
import pandas as pd
from skimage.io import imread
import matplotlib.pyplot as plt
%matplotlib inline
from keras import models, layers, regularizers, optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
import scipy
import numpy as np
from PIL import Image
from scipy import ndimage
from keras.preprocessing.image import ImageDataGenerator, array_to_img
import tensorflow as tf
np.random.seed(123)

In [2]:
# path to the data directory
data_dir = Path('data/chest_xray/chest_xray')

# Path to train, validation, test directory
train_folder = data_dir / 'train'
test_folder = data_dir / 'val'
val_folder = data_dir / 'test'

In [3]:
# path to the normal and pneumonia sections
train_n_folder = train_folder / 'NORMAL'
train_p_folder = train_folder/ 'PNEUMONIA'

val_n_folder = val_folder / 'NORMAL'
val_p_folder = val_folder / 'PNEUMONIA'

test_n_folder = test_folder / 'NORMAL'
test_p_folder = test_folder / 'PNEUMONIA'

### Store Images

In [4]:
#create list to store images
train_n = [file for file in os.listdir(train_n_folder) if file.endswith('.jpeg')]
train_p = [file for file in os.listdir(train_p_folder) if file.endswith('.jpeg')]

val_n = [file for file in os.listdir(val_n_folder) if file.endswith('.jpeg')]
val_p = [file for file in os.listdir(val_p_folder) if file.endswith('.jpeg')]

test_n = [file for file in os.listdir(test_n_folder) if file.endswith('.jpeg')]
test_p = [file for file in os.listdir(test_p_folder) if file.endswith('.jpeg')]

In [5]:
train_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
        train_folder, 
        target_size=(256, 256), batch_size= 5216)

Found 5216 images belonging to 2 classes.


In [6]:
val_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
        val_folder, 
        target_size=(256, 256), batch_size = 624) 

Found 624 images belonging to 2 classes.


In [7]:
test_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
        test_folder, 
        target_size=(256, 256), batch_size = 16)

Found 16 images belonging to 2 classes.


In [8]:
# create the data sets
train_images, train_labels = next(train_generator)
test_images, test_labels = next(test_generator)
val_images, val_labels = next(val_generator)

In [10]:
train_y = np.reshape(train_labels[:,0], (5216,1))
val_y = np.reshape(val_labels[:,0], (624,1))
test_y = np.reshape(test_labels[:,0], (16,1))

In [29]:
#create class_weight
class_weight = {0: (3875/1341), 1: 1}
class_weight

{0: 2.889634601043997, 1: 1}

In [67]:
model = models.Sequential()
model.add(layers.Conv2D(16, (3, 3), activation='relu', padding = 'same', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding = 'same'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding = 'same'))
model.add(layers.MaxPooling2D((2, 2)))

In [68]:
model.add(layers.Flatten())

model.add(layers.Dense(1, activation='sigmoid'))

In [69]:
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 256, 256, 16)      448       
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 128, 128, 16)      0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 128, 128, 32)      4640      
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 64, 64, 64)        18496     
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 32, 32, 64)        0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 65536)           

In [70]:
model.compile(optimizer=optimizers.Adam(lr=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Recall()])

In [71]:
history = model.fit(train_images,
                    train_y,
                    epochs=10,
                    class_weight = class_weight, 
                    batch_size=32,
                    validation_data=(val_images, val_y))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 30/163 [====>.........................] - ETA: 2:46 - loss: 0.1031 - accuracy: 0.9729 - recall_12: 0.9206

KeyboardInterrupt: 

In [42]:
model_train = model.evaluate(train_images, train_y)
model_val = model.evaluate(val_images, val_y)



In [43]:
# Fast model
fmodel = models.Sequential()

fmodel.add(layers.Conv2D(16, (3, 3), activation='relu',padding='same', input_shape=(256,256,3)))
fmodel.add(layers.MaxPooling2D((2, 2)))

fmodel.add(layers.Flatten())

fmodel.add(layers.Dense(1, activation='sigmoid'))

In [44]:
fmodel.compile(optimizer=optimizers.RMSprop(lr=1e-4),
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.Recall()])

In [45]:
fm_history = fmodel.fit(train_images, train_y,
                    epochs=20,
                    class_weight = class_weight,
                    batch_size=10,
                    validation_data=(val_images, val_y))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [46]:
fmodel_train = fmodel.evaluate(train_images, train_y)
fmodel_val = fmodel.evaluate(val_images, val_y)



# Transfer Learning

In [47]:
from keras.applications import VGG19
cnn_base = VGG19(weights='imagenet', 
                 include_top=False, 
                 input_shape=(256, 256, 3))

In [53]:
cnn_model = models.Sequential()
cnn_model.add(layers.Dense(256, activation='relu', input_dim=2*2*512))
cnn_model.add(layers.Dense(1, activation='sigmoid'))

cnn_model.compile(optimizer=optimizers.RMSprop(lr=1e-4),
              loss='binary_crossentropy',
              metrics=['acc', tf.keras.metrics.Recall()])

history = model.fit(train_images, train_y,
                    epochs=20,
                    class_weight = class_weight,
                    batch_size=10,
                    validation_data=(val_images, val_y))

Epoch 1/20

KeyboardInterrupt: 