In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import plot_model
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix


# Final Project

### By: Andrew Peterson

#### Description:

    This project is here as a simple CNN for detecting pneumonia in a patients x-rays.


In [None]:
# Constants
batch_size = 16
img_width = 500
img_height = 500

# Directories
train_dir = "../input/chest-xray-pneumonia/chest_xray/train"
test_dir = "../input/chest-xray-pneumonia/chest_xray/test"
val_dir = "../input/chest-xray-pneumonia/chest_xray/val"


: 

## Data Augmentation


In [None]:
# Image Generator

# Specific for the train data only.
# Here we augment the data to allow for tougher training and hopefully better results
train_data_gen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# These are for the regular test and val data. Only rescaling them to be the same.
# No Augmentation
test_data_gen = ImageDataGenerator(rescale=1./255)
val_data_gen = ImageDataGenerator(rescale=1./255)


In [None]:
# Create flow for images.
# Each Image will get resized and gray scaled.

train_gen = train_data_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    color_mode='grayscale',
    class_mode='binary',
    batch_size=batch_size
)

test_gen = test_data_gen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    color_mode='grayscale',
    shuffle=False,
    class_mode='binary',
    batch_size=batch_size
)

val_gen = val_data_gen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    color_mode='grayscale',
    class_mode='binary',
    batch_size=batch_size
)


In [None]:
# Plot a few of the images to show what the data looks like

plt.figure(figsize=(12, 12))
for i in range(0, 10):
    plt.subplot(2, 5, i+1)
    for X_batch, Y_batch in train_gen:
        image = X_batch[0]
        dic = {0: "NORMAL", 1: "PNEUMONIA"}
        plt.title(dic.get(Y_batch[0]))
        plt.axis("off")
        plt.imshow(np.squeeze(image), cmap="gray", interpolation="nearest")
        break
plt.tight_layout()
plt.show()


In [None]:
# Model
Model = Sequential()

# Initial Layer of CNN
Model.add(Conv2D(32, (3, 3), activation="relu",
                 input_shape=(img_width, img_height, 1)))
Model.add(MaxPooling2D(pool_size=(2, 2)))

Model.add(Conv2D(64, (3, 3), activation="relu",))
Model.add(MaxPooling2D(pool_size=(2, 2)))

Model.add(Conv2D(64, (3, 3), activation="relu"))
Model.add(MaxPooling2D(pool_size=(2, 2)))

Model.add(Conv2D(128, (3, 3), activation="relu"))
Model.add(MaxPooling2D(pool_size=(2, 2)))

Model.add(Conv2D(128, (3, 3), activation="relu"))
Model.add(MaxPooling2D(pool_size=(2, 2)))

# DNN
Model.add(Flatten())
Model.add(Dense(activation='relu'))
Model.add(Dense(activation='relu'))

# Output Layer
Model.add(Dense(activation='sigmoid', units=1))

# Compile Model to be trained
Model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

Model.summary()

In [None]:
# Plot what the Model looks like.
# More Visual version of Model.summary()
plot_model(Model, show_shapes=True, show_layer_names=True,
           rankdir='TB', expand_nested=True)


Over fitting Prevention:


In [None]:
# Take into account early stopping to prevent over fitting of the model
early = EarlyStopping(monitor="val_loss", mode="min", patience=3)

# When there is a plateau in training accuracy reduce the step size.
learning_rate_reduction = ReduceLROnPlateau(
    monitor="val_loss", patience=2, verbose=1, factor=0.3, min_lr=0.000001)

# List of all the callbacks
callbacks_list = [early, learning_rate_reduction]


In [None]:
# Compute a starting weight for each class
weights = compute_class_weight(class_weight="balanced", classes=np.unique(
    train_gen.classes), y=train_gen.classes)

# Turn those weights into a dictionary to be used by the model.
class_weights = dict(zip(np.unique(train_gen.classes), weights))

print(class_weights)


In [None]:
# Here we fit the model

Model.fit(train_gen, epochs=25, validation_data=val_gen,
          class_weight=class_weights, callbacks=callbacks_list)


In [None]:
# Take all values from the Model.History and plot them.
pd.DataFrame(Model.history.history).plot()

In [None]:
# Get models accuracy on test data by evaluating it.

test_accuracy = Model.evaluate(test_gen)
print('The testing accuracy is :', test_accuracy[1]*100, '%')


### Analysis

In [None]:
# Take the model and let it predict on data it has never seen before
preds = Model.predict(test_gen, verbose=1)

In [None]:
# Take predictions and convert them into binary. 
# This number .50 can be changed based on how intense you want the scrubbing to be. 
# This change can be seen in the heat map.

predictions = Model.copy()
predictions[predictions <= 0.5] = 0
predictions[predictions > 0.5] = 1

In [None]:
# Take all the data from the test data set and the predictions. Turn them into a confusion matrix.
# Displaying this confusion matrix as a heat map. This is easy because this is a binary representation.

cm = pd.DataFrame(data=confusion_matrix(test_gen.classes, predictions, labels=[0, 1]), index=["Actual Normal", "Actual Pneumonia"],
                  columns=["Predicted Normal", "Predicted Pneumonia"])

sns.heatmap(cm, annot=True, fmt="d")
