# Pneumonia detection based on Chest X-Ray images on imbalanced data using tensorflow transformation

The original notebook can be found here https://www.kaggle.com/michalbrezk/x-ray-pneumonia-cnn-tensorflow-2-0-keras-94

## Introduction

In this notebook we use tensorflow to load and pre-process X-Ray images of chest and apply Keras CNN model on these data. Dataset is imbalanced (approx. 1:3), images may have different site and can have one or 3 color channels.



## Load libraries

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical


## Load Data

The dataset consists of 3 folders - train, test, val. Train & test are used for modeling, validation will be used to check performance of model. Size of validation set is very small (16 cases).

Each folder contains PNEUMONIA and NORMAL sub-folder. 

In [3]:
path='../chest_xray/'

val_dir = path+'val'
test_dir = path+'test'
train_dir = path+'train'

img_height = 196
img_width = 196
image_size =(img_height, img_width)

class_name = {"0":"NORMAL","1":"PNEUMONIA"}

batch_size = 32


### Load files

In [4]:
print("---------------train files-----------------")
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir, labels='inferred', label_mode='binary',class_names=["NORMAL","PNEUMONIA"], color_mode='grayscale',
    batch_size=batch_size, image_size=image_size, shuffle=True, seed=0, validation_split=None, subset=None)

print("---------------test files-----------------")
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir, labels='inferred', label_mode='binary',class_names=["NORMAL","PNEUMONIA"], color_mode='grayscale',
    batch_size=batch_size, image_size=image_size, shuffle=True, seed=0, validation_split=None, subset=None)

print("---------------validation files-----------------")
val_data = tf.keras.preprocessing.image_dataset_from_directory(
    val_dir, labels='inferred', label_mode='binary',class_names=["NORMAL","PNEUMONIA"], color_mode='grayscale',
    batch_size=batch_size, image_size=image_size, shuffle=True, seed=0, validation_split=None, subset=None)

---------------train files-----------------
Found 5216 files belonging to 2 classes.
---------------test files-----------------
Found 624 files belonging to 2 classes.
---------------validation files-----------------
Found 16 files belonging to 2 classes.


In [5]:
for image_batch, labels_batch in train_data:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

(32, 196, 196, 1)
(32, 1)


## Pre-processing

### Standardization


In [7]:
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

train = train_data.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(train))
first_image = image_batch[0]

# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

test = test_data.map(lambda x, y: (normalization_layer(x), y))
val = val_data.map(lambda x, y: (normalization_layer(x), y))

0.0 1.0


### Convert to numpy

In [8]:
Y_train = np.concatenate([y for x , y in train], axis=0)
X_train = np.concatenate([x for x , y in train], axis=0)

Y_test = np.concatenate([y for x , y in test], axis=0)
X_test = np.concatenate([x for x , y in test], axis=0)

Y_val = np.concatenate([y for x , y in val], axis=0)
X_val = np.concatenate([x for x , y in val], axis=0)

In [9]:
y_train = to_categorical(Y_train)
y_test = to_categorical(Y_test)
y_val = to_categorical(Y_val)


### Data augmentation

In [10]:

# define generator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    zoom_range = 0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=False,
    vertical_flip=False
)

datagen.fit(X_train)


## Modeling

### define model

In [11]:
def keras_model_builder():
    """Build a keras model for image classification on cifar10 dataset."""
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters=8, kernel_size=(7,7), padding='same', activation='relu',
                               input_shape=(196, 196, 1), name='img_raw_xf'),
        tf.keras.layers.Conv2D(filters=8, kernel_size=(7,7), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
        
        tf.keras.layers.Conv2D(filters=16, kernel_size=(5,5), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=16, kernel_size=(5,5), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(3,3)),
     
        tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
        
        tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2,2)),        
      
        tf.keras.layers.Flatten(),

        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(2, activation='softmax')
        
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=0.0001, decay=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy'])

    
    return model

In [None]:
model = keras_model_builder()

saveBestModel = tf.keras.callbacks.ModelCheckpoint('../models/pneu_detect_tf_model.hdf5', 
                                                monitor='val_acc', verbose=0, 
                                                save_best_only=True, 
                                                save_weights_only=False, 
                                                mode='auto')

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)


history = model.fit(datagen.flow(X_train,y_train, batch_size=32), validation_data=(X_test, y_test),
                    epochs = 5, verbose = 1, callbacks=[callback], class_weight={0:6.0, 1:0.5})

Epoch 1/50
Epoch 2/50
Epoch 3/50
 289/1304 [=====>........................] - ETA: 8:19 - loss: 0.9631 - accuracy: 0.2645