In [ ]:
!pip install tensorflow
!pip install opencv-python
!pip install numpy
!pip install matplotlib

In [ ]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import cv2
import os
import matplotlib.pyplot as plt

In [ ]:
train_images = []
train_labels = []
test_images = []
test_labels = []
val_images = []
val_labels = []


In [ ]:
def get_data(target_dir):
    images = []
    labels = []
    label_dict = {"NORMAL": 0, "PNEUMONIA": 1}
    for directory in os.listdir(target_dir):
        path = target_dir + "/" + directory
        label = label_dict[directory]
        for image in os.listdir(path):
            img = cv2.imread(path + "/" + image, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (256,256))
            flippedImg = cv2.flip(img, 1)
            images.append(flippedImg)
            labels.append(label)
            images.append(img)
            labels.append(label)
    
    return images, labels

In [ ]:
train_images, train_labels = get_data("chest_xray/train")
test_images, test_labels = get_data("chest_xray/test")
val_images, val_labels = get_data("chest_xray/val")

train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)
val_images = np.array(val_images)
val_labels = np.array(val_labels)


In [ ]:
def shuffle(A, B):
    cur_state = np.random.get_state()
    np.random.shuffle(A)
    np.random.set_state(cur_state)
    np.random.shuffle(B)

shuffle(train_images, train_labels)
shuffle(test_images, test_labels)
shuffle(val_images, val_labels)

In [ ]:
#Create the CNN
model = keras.Sequential()
#Block 1
model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(256, 256, 1), padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))


model.add(keras.layers.Conv2D(32, (1,1), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))

model.add(keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))


model.add(keras.layers.Conv2D(64, (1,1), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))

model.add(keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))


model.add(keras.layers.Conv2D(128, (1,1), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))

model.add(keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))


model.add(keras.layers.Conv2D(256, (1,1), activation='relu', padding='same'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.Dropout(0.1))


#Fully connected layers
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(32, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(32, activation="relu"))
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(2, activation='softmax'))

print(model.summary())

In [ ]:
train_images = np.expand_dims(train_images, axis=3)
val_images = np.expand_dims(val_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

print(train_images.shape)

(10432, 256, 256, 1)


In [ ]:

#Create a callback
callback = keras.callbacks.EarlyStopping('val_loss', patience=3)

#Compile the CNN
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

#fit the model
history = model.fit(train_images, train_labels, epochs=20, callbacks=[callback], validation_data=(val_images, val_labels), batch_size=16)

In [ ]:
train_loss, train_acc = model.evaluate(train_images, train_labels)
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Training Accuracy: " + str(train_acc) + " Training Loss: " + str(train_loss))
print("Testing Accuracy: " + str(test_acc) + " Testing Loss: " + str(test_loss))

for i in range(20):
    pred = model.predict(np.expand_dims(test_images[i], axis=0))
    print(pred)
    print("Model predicted " + str(np.argmax(pred)) + " when it was " + str(test_labels[i]))

In [ ]:
model.save("pneumonia_model_v2.h5") #82.69% is the best accuracy so far (v2)