In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from PIL import Image
import os
import numpy as np
import cv2

In [2]:
# img loading and preprocessing

class_names = ('NORMAL', 'PNEUMONIA')

train_imgs = []
test_imgs = []

img_size = (512, 512) # to standardize input size for cnn

# load normal train imgs
for file_name in os.listdir('data/train/NORMAL'):
    img = np.resize(cv2.imread('data/train/NORMAL/' + file_name, 0), img_size)
    img = img.astype('float32')
    img /= 255.0
    train_imgs.append(img)
    
# Load pneumonia train imgs
for file_name in os.listdir('data/train/PNEUMONIA'):
    img = np.resize(cv2.imread('data/train/PNEUMONIA/' + file_name, 0), img_size)
    img = img.astype('float32')
    img /= 255.0
    train_imgs.append(img)
    
# load normal test imgs
for file_name in os.listdir('data/test/NORMAL'):
    img = np.resize(cv2.imread('data/train/NORMAL/' + file_name, 0), img_size)
    img = img.astype('float32')
    img /= 255.0
    test_imgs.append(img)
    
# Load pneumonia test imgs
for file_name in os.listdir('data/test/PNEUMONIA'):
    img = np.resize(cv2.imread('data/train/PNEUMONIA/' + file_name, 0), img_size)
    img = img.astype('float32')
    img /= 255.0
    test_imgs.append(img)


In [3]:
# more preprocessing

train_x = np.array(train_imgs)
test_x = np.array(test_imgs)

In [4]:
# more preprocessing

train_x = np.expand_dims(train_x, axis=3)
test_x = np.expand_dims(test_x, axis=3)

In [5]:
# label setup

from itertools import repeat

train_y = []
test_y = []

train_y.extend(repeat([0, 1], 1341))
train_y.extend(repeat([1, 0], 3875))

test_y.extend(repeat([0, 1], 242))
test_y.extend(repeat([1, 0], 398))

train_y = np.array(train_y)
test_y = np.array(test_y)

In [6]:
# Convolutional + pooling layers

model = models.Sequential()
model.add(layers.Conv2D(4, (3, 3), padding='same', activation='relu', input_shape=(512, 512, 1))) # extract low level, with dimensional preservation
model.add(layers.MaxPooling2D((2, 2))) # pool by taking max over 2 x 2 matrix
model.add(layers.MaxPooling2D((2, 2))) # pool by taking max over 2 x 2 matrix
model.add(layers.Conv2D(32, (3, 3), padding='valid', activation='relu'))
model.add(layers.MaxPooling2D((2, 2))) # pool by taking max over 2 x 2 matrix
model.add(layers.Conv2D(32, (3, 3), padding='valid', activation='relu'))
model.add(layers.MaxPooling2D((2, 2))) # pool by taking max over 2 x 2 matrix
model.add(layers.Conv2D(32, (3, 3), padding='valid', activation='relu'))
model.add(layers.MaxPooling2D((2, 2))) # pool by taking max over 2 x 2 matrix
model.add(layers.Conv2D(64, (3, 3), padding='valid', activation='relu'))

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 512, 512, 4)       40        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 256, 256, 4)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 128, 4)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 126, 126, 32)      1184      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 61, 61, 32)        9248      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 30, 30, 32)        0

In [7]:
# FC layers

model.add(layers.Flatten()) # Flatten 3D output from convolutions into 1D vectors

model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax')) # output of size 2 vector (to classify between normal and pneumonia)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 512, 512, 4)       40        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 256, 256, 4)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 128, 4)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 126, 126, 32)      1184      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 61, 61, 32)        9248      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 30, 30, 32)        0

In [30]:
# compile model

optim = tf.keras.optimizers.Adam(0.0001)

model.compile(optimizer=optim,
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [31]:
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(5216, 512, 512, 1)
(5216, 2)
(640, 512, 512, 1)
(640, 2)


In [32]:
# balance classes with weighting

weights = {
    0: 1, 
    1: 0.35
}
print(weights)

{0: 1, 1: 0.35}


In [33]:
# train model

history = model.fit(train_x, train_y, epochs=15, validation_data=(test_x, test_y), class_weight=weights)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 5216 samples, validate on 640 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [34]:
# Save the model
model.save('trained_models/xray')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: trained_models/xray\assets


In [35]:
# Evaluate model with test set only
loss, accuracy = model.evaluate(test_x, test_y, verbose=1)
print('Classification accuracy with test set: ' + str(accuracy*100) + '%')

Classification accuracy with test set: 62.187498807907104%
