In [14]:
# Input data files are available in the "../input/" directory.
import numpy as np 
from matplotlib import pyplot as plt
import tensorflow as tf
import cv2
import os
from sklearn.utils import shuffle
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense
from tensorflow.keras.models import Sequential

In [15]:
# Look at the Dataset
!cd ../input/chest-xray-pneumonia/chest_xray/chest_xray/train/ && ls

NORMAL	PNEUMONIA


In [16]:
# Encode the Labels using a Dictionary for easy usage.
datasets = ["../input/chest-xray-pneumonia/chest_xray/chest_xray/train", "../input/chest-xray-pneumonia/chest_xray/chest_xray/test"]
class_labels = {
    'NORMAL': 0,
    'PNEUMONIA': 1
}

current_fl = ""

In [17]:
def train_test_split():
    mem_count = 0
    # Target size of images after resizing
    target_size = (224,224)
    # Used to store the output consisting of training and testing images
    out = list()
    # Loop through all folders in datasets (training and testing images folders)
    for dataset in datasets:
        images, labels = list(), list()
        # Loop through all folders in the current dataset folder (all the 5 different landscape images folder)
        for folder in os.listdir(dataset):
            if folder == ".DS_Store":
                continue
            # Get the current label by matching the current folder to our dictionary we created earlier
            labl = class_labels[folder]
            # Loop through all files in current landscape folder
            for fl in os.listdir(dataset + "/" + folder):
                if fl == ".DS_Store":
                    continue
                # Read the current image
                img = cv2.imread(dataset+"/"+folder+"/"+fl)
                # Resize the current image
                img = cv2.resize(img, target_size)
                # Append the current image to "images"
                images.append(img)
                # Do the same with current label
                labels.append(labl)
        # After every looping through every dataset, shuffle the images and labels correspondingly
        images, labels = shuffle(images, labels)
        # Convert the images and labels to numpy arrays with respective datatypes to avoid error later
        images = np.array(images, dtype='float32')
        labels = np.array(labels, dtype='int32')
        # Append the current dataset of images and labels in form of a tuple in "out"
        out.append((images, labels))
    return out

In [18]:
# Process and split the training data using our custom function
(trainX, trainY), (testX, testY) = train_test_split()

In [19]:
# Rescale the Data for optimized calculations
trainX = trainX / 255.
testX = testX / 255.

In [20]:
# This is the final model that I reached with 4 Convolution and 4 Dense layers after a lot of Trial-and-error.
# It has a final 79% Testing Accuracy

model = Sequential()
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [21]:
# Compile the model and take a look at the Trainable Parameters!
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 222, 222, 128)     3584      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 111, 111, 128)     0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 109, 109, 128)     147584    
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 54, 54, 128)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 52, 52, 64)        73792     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 26, 26, 64)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 24, 24, 32)       

In [22]:
# 5 Epochs are good enough
model.fit(trainX, trainY, epochs=5, validation_split=.1)

Train on 4694 samples, validate on 522 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f4e685865f8>

In [23]:
test_lss, test_acc = model.evaluate(testX, testY, batch_size=16)
print(test_acc)



0.84134614


In [None]:
# Test the model on a random image and get predictions
test_normal_img = cv2.imread("../input/chest-xray-pneumonia/chest_xray/chest_xray/test/PNEUMONIA/person21_virus_52.jpeg")
test_normal_img = cv2.resize(test_normal_img, (224,224))

test_normal_img = np.expand_dims(test_normal_img, 0)
probs = np.argmax(model.predict(test_normal_img.astype('float32')))

class_names = ['Normal', 'Pneumonia']
print(class_names[probs])