In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator, array_to_img

import keras
from keras.layers import Dense
from keras.models import Sequential
from keras import models
from keras import layers
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Conv2D, MaxPool2D, Dropout, SpatialDropout2D, BatchNormalization, Input, Activation, Dense, Flatten
from keras import optimizers


import os
import tensorflow as tf
import shutil
import cv2

from sklearn.metrics import classification_report, confusion_matrix

# 1. Preparing the Dataset:

In [None]:
train_dir = os.getcwd() + '\\train'
test_dir = os.getcwd() + '\\test'
valid_dir = os.getcwd() + '\\val'

In [None]:
valid_dir_normal = os.getcwd() +'\\val\\NORMAL'
valid_dir_pneumonia = os.getcwd() +'\\val\\PNEUMONIA'

train_dir_normal = os.getcwd() + '\\train\\NORMAL'
train_dir_pneumonia = os.getcwd() + '\\train\\PNEUMONIA'

In [None]:
valid_length = len(os.listdir(valid_dir_normal))
valid_length_p = len(os.listdir(valid_dir_pneumonia))

In [None]:
train_normal_move = os.listdir(train_dir_normal)[0:round(len(os.listdir(train_dir_normal)) * 0.2)]
train_pneumonia_move = os.listdir(train_dir_pneumonia)[0:round(len(os.listdir(train_dir_pneumonia)) * 0.2)]

In [None]:
for filename in train_normal_move:
    if filename == '.DS_Store':
        pass
    else:
        shutil.move(os.path.join(train_dir_normal, filename), valid_dir_normal)

In [None]:
for filename in train_pneumonia_move:
    if filename == '.DS_Store':
        pass
    else:
        shutil.move(os.path.join(train_dir_pneumonia, filename), valid_dir_pneumonia)

In [None]:
labels = ['PNEUMONIA', 'NORMAL']
img_size = 150

def get_training_data(data_dir):
    data = [] 
    for label in labels: 
        path = os.path.join(data_dir, label) # Gets the path for main folder + pneumonia or normal
        class_num = labels.index(label) #Gets the labels 0 = 'PNEUMONIA' 1 = 'NORMAL'
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                data.append([resized_arr, class_num]) #Resized arr, plus label of 1 or 0
            except Exception as e:
                print(e)
    return np.array(data)

In [None]:
train = get_training_data(train_dir)
test = get_training_data(test_dir)
val = get_training_data(valid_dir)

In [None]:
print(train.shape)
print(test.shape)
print(val.shape)

In [None]:
fig, (axes,axes2) = plt.subplots(ncols =2 , nrows=1,figsize = (6,6))

axes.imshow(train[0][0], cmap='gray')
axes.set_title(labels[train[0][1]])

axes2.imshow(train[-1][0], cmap='gray')
axes2.set_title(labels[train[-1][1]])

plt.tight_layout()
plt.show()

In [None]:
X_train = []
y_train = []

X_valid = []
y_valid = []

X_test = []
y_test = []

for feature, label in train:
    X_train.append(feature)
    y_train.append(label)

for feature, label in test:
    X_test.append(feature)
    y_test.append(label)
    
for feature, label in val:
    X_valid.append(feature)
    y_valid.append(label)

In [None]:
# Normalize the data
X_train = np.array(X_train) / 255.0
X_valid = np.array(X_valid) / 255.0
X_test = np.array(X_test) / 255.0

In [None]:
# resize data for deep learning 
X_train = X_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

X_valid = X_valid.reshape(-1, img_size, img_size, 1)
y_valid = np.array(y_valid)

X_test = X_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

# 1A. Preparing the Dataset V2:

In [None]:
# Fitting the CNN to the images
# The function ImageDataGenerator augments your image by iterating through image as your CNN is getting ready to process that image

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)  #Image normalization.

In [None]:
train_generator = train_datagen.flow_from_directory(train_dir,
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(valid_dir,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary')


test_generator = test_datagen.flow_from_directory(test_dir,
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'binary')

In [None]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break
    
print()

for data_batch, labels_batch in validation_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break


    'train_set[0]' corresponds to the first set of image batches flowed from the source folder;
    'train_set[0][0]' coresponds to the first batch of 16 images included in the first set of batches;
    Each of the 16 images in this first batch can be accessed via the 'array_to_img' method and properly plotted.

In [None]:
image_batch = train_generator[0][0]

plt.figure(figsize=(20,5))
for i in range(16):
    plt.subplot(2,8,i+1)
    pil_img = array_to_img(image_batch[i])
    plt.imshow(pil_img,cmap='gray')
    plt.axis('off')
plt.tight_layout()
plt.show()

# 2. Modelling:

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(150, 150, 1)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['acc'])

In [None]:
model.fit(X_train,y_train,epochs = 30, validation_data = (X_valid,y_valid),callbacks=[keras.callbacks.EarlyStopping(patience=5)])

# 2A. Modelling:

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(64, 64, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])

In [None]:
model.summary()

In [None]:
model.fit_generator(train_generator,steps_per_epoch=100,epochs=10,validation_data=validation_generator,validation_steps=50)

# 3A. Results:

In [None]:
pd.DataFrame(model.history.history).plot(figsize=(8, 5))
plt.title('Loss / Accuracy')
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]

plt.tight_layout()
plt.show()

In [None]:
test_accu = model.evaluate_generator(test_generator)

In [None]:
print('The testing accuracy is :',round(test_accu[1]*100,2), '%')

In [None]:
predictions = model.predict_generator(test_generator)

In [None]:
predictions.shape

In [None]:
predictions[predictions <= 0.5] = 0
predictions[predictions > 0.5] = 1

In [None]:
print(classification_report(test_generator.classes,predictions))

In [None]:
fig, ax = plt.subplots(figsize = (10,6))

sns.heatmap(confusion_matrix(test_generator.classes,predictions),annot = True, fmt ='d',annot_kws={"size": 16}, 
            xticklabels = ['Predicted Norm','Predictd Pneumonia'], yticklabels =['Actual Norm', 'Actual Pneumonia' ])