In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report, confusion_matrix

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

import glob

# Exploratory Data Analysis

In [None]:
train_path = "../input/chest-xray-pneumonia/chest_xray/train"
val_path = "../input/chest-xray-pneumonia/chest_xray/val"
test_path = "../input/chest-xray-pneumonia/chest_xray/test"

In [None]:
normal_train_images = glob.glob(train_path + "/NORMAL/*.jpeg")
pneumonia_train_images = glob.glob(train_path + "/PNEUMONIA/*.jpeg")
print(len(normal_train_images))
print(len(pneumonia_train_images))

In [None]:
data = pd.DataFrame(np.concatenate([[0] * len(normal_train_images) , [1] *  len(pneumonia_train_images)]), columns = ["class"])

In [None]:
sns.countplot(x = 'class', data = data)

In [None]:
plt.figure(figsize = (12, 7))
plt.pie(x = np.array([len(pneumonia_train_images), len(normal_train_images)]), autopct = "%.1f%%", explode = [0.2, 0], labels = ["Pneumonia", "Normal"], pctdistance = 0.5)
plt.title("Training data", fontsize = 14);

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 3, figsize = (15, 10), subplot_kw = {'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
    img = cv2.imread(normal_train_images[i])
    img = cv2.resize(img, (224, 224))
    ax.imshow(img)
    ax.set_title("Normal")
fig.tight_layout()    

plt.show()

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 3, figsize = (15, 10), subplot_kw = {'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
    img = cv2.imread(pneumonia_train_images[i])
    img = cv2.resize(img, (224, 224))
    ax.imshow(img)
    ax.set_title("Pneumonia")
    
plt.show()

# Augmenting the training data

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255.,
                                   rotation_range=30,
                                   brightness_range=[0.2,1.2],
                                   width_shift_range=0.1, 
                                   height_shift_range=0.1, 
                                   horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale = 1./255.,)
test_datagen = ImageDataGenerator(rescale = 1./255.,)

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = (224, 224),
    batch_size = 16,
    class_mode = 'binary'
)
validation_generator = val_datagen.flow_from_directory(
        test_path,
        target_size = (224, 224),
        batch_size = 8,
        class_mode = 'binary'
)
test_generator = test_datagen.flow_from_directory(
    val_path,
    shuffle = False,
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'binary'
)

# Build the model

In [None]:
input_shape = (224, 224, 3)

base_model = ResNet50V2(weights = 'imagenet', input_shape = input_shape, include_top = False)

for layer in base_model.layers:
    layer.trainable = False
    
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(optimizer = "adam", loss = 'binary_crossentropy', metrics = ["accuracy"])

In [None]:
model.summary()

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", mode = "min", patience = 4)

history = model.fit(train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 10, callbacks = callback, class_weight={0: 1.95, 1: 0.67})

In [None]:
model.save('pneumonia_pred_model_1.h5')

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plt.figure(figsize = (15,10))

plt.subplot(2, 2, 1)
plt.plot(accuracy, label = "Training Accuracy")
plt.plot(val_accuracy, label = "Validation Accuracy")
plt.ylim(0.4, 1)
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.title("Training vs Validation Accuracy")
plt.xlabel('epoch')
plt.ylabel('accuracy')


plt.subplot(2, 2, 2)
plt.plot(loss, label = "Training Loss")
plt.plot(val_loss, label = "Validation Loss")
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.title("Training vs Validation Loss")
plt.xlabel('epoch')
plt.ylabel('loss')

plt.show()

In [None]:
from tensorflow.keras.models import load_model
model = load_model('/kaggle/working/pneumonia_pred_model_1.h5')

In [None]:
pred = model.predict(test_generator)

In [None]:
y_pred = []
for prob in pred:
    if prob >= 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
y_true = test_generator.classes

In [None]:
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot = True, cmap = "Blues", annot_kws = {"size": 16})

In [None]:
print(classification_report(y_true, y_pred))