# Pneumonia_Chest_X-Ray

CNN image detection of pneumonia from chest xrays.  
Project By: [Uzair Bin Asim](https://github.com/Uzair05)

In [1]:
import os 
import zipfile

## Download dataset

In [None]:
#!pip install --upgrade kaggle
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

In [None]:
try:
    if not os.path.isdir("./data"):
        os.mkdir("./data")
except Exception as err:
    print(f"Error in creating data/:\t{err}")

if os.path.isfile("./chest-xray-pneumonia.zip"):
    localzip = "./chest-xray-pneumonia.zip"
    with zipfile.ZipFile(localzip, 'r') as zip_ref:
        zip_ref.extractall('data/')

In [None]:
## Clean the dataspace
# !rm -rf data/chest_xray/__MACOSX
# !cd data/ && find -type f | grep -E "\.DS_Store" | xargs -I{} rm {}

## Handle Data
Create the train, test, and validation datasets.

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
main_path = "data/chest_xray/chest_xray"

test_dir, train_dir = os.path.join(main_path, "test"), os.path.join(main_path, "train")
validation_dir = os.path.join(main_path, "val") 

In [5]:
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0, 
    rotation_range=10, 
    width_shift_range=0.3, 
    height_shift_range=0.3, 
    shear_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True, 
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(
    rescale=1.0/255.0
)
val_datagen = ImageDataGenerator(
    rescale=1.0/255.0
)

In [6]:
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)
val_generator = val_datagen.flow_from_directory(
    validation_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


## Create Model - v2

In [11]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(12, (3,3), activation=tf.nn.relu, padding='same', input_shape=(150,150, 1)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation=tf.nn.relu, padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation=tf.nn.relu, padding='same'),
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(28, activation=tf.nn.softmax)
])
model.compile(optimizer='adam', 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 150, 150, 12)      120       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 75, 75, 12)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 75, 75, 32)        3488      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 37, 37, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 37, 37, 64)        18496     
_________________________________________________________________
flatten_2 (Flatten)          (None, 87616)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)              

## Train Model - v1

In [12]:
class myCallback(tf.keras.callbacks.Callback):
      def on_epoch_end(self, epoch, logs={}):
        accuracy = 0.99 # Percentage Accuracy.
        if(logs.get('accuracy') != None) and (logs.get('accuracy') >= accuracy): # Experiment with changing this value
          print(f"\nReached {accuracy*100}% accuracy so cancelling training!")
          self.model.stop_training = True

callbacks = myCallback()

In [None]:
history = model.fit(
    train_generator, 
    validation_data = test_generator,
    steps_per_epoch = 259, 
    epochs = 10,
    validation_steps = 30,
    verbose = 1, 
    callbacks=[callbacks]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

## Evaluation

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
#loss = history.history['loss']
#val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

## Save Model

In [None]:
tf.keras.models.save_model(model, filepath="./my_model/OwnCNN_EfficientNetB3_99p.h5", overwrite=True, save_format='h5')

## Train Model - v2

### Load appended data
Author provided extra data in a different directory during update.

In [None]:
main_path = "data/chest_xray/"

test_dir, train_dir = os.path.join(main_path, "test"), os.path.join(main_path, "train")
validation_dir = os.path.join(main_path, "val") 

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0, 
    rotation_range=10, 
    width_shift_range=0.3, 
    height_shift_range=0.3, 
    shear_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True, 
)
test_datagen = ImageDataGenerator(
    rescale=1.0/255.0
)
val_datagen = ImageDataGenerator(
    rescale=1.0/255.0
)

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)
val_generator = val_datagen.flow_from_directory(
    validation_dir, 
    target_size=(150,150), 
    class_mode='binary', 
    batch_size=20
)

### Second Training

In [None]:
history = model.fit(
    train_generator, 
    validation_data = test_generator,
    #steps_per_epoch = 100,
    epochs = 20,
    validation_steps = 50,
    verbose = 2
)

### Second Evaluation

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
#loss = history.history['loss']
#val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

### Save Model
Overwrite Previous Save

In [None]:
tf.keras.models.save_model(model, filepath="./my_model/OwnCNN_EfficientNetB3_99p.h5", overwrite=True, save_format='h5')