In [4]:
import pandas as pd
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import activations
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, AveragePooling2D, Conv2D, MaxPool2D, Activation

In [5]:
# Config
IMG_SIZE = 64
BATCH_SIZE=32
train_path = 'data/train/'
test_path = 'data/test/'

## 1 Data preparation

In [6]:
# Read Metadata csv
df = pd.read_csv('Chest_xray_Corona_Metadata.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
0,0,IM-0128-0001.jpeg,Normal,TRAIN,,
1,1,IM-0127-0001.jpeg,Normal,TRAIN,,
2,2,IM-0125-0001.jpeg,Normal,TRAIN,,
3,3,IM-0122-0001.jpeg,Normal,TRAIN,,
4,4,IM-0119-0001.jpeg,Normal,TRAIN,,


In [7]:
df_train = df[df['Dataset_type'] == 'TRAIN'].copy()
df_test = df[df['Dataset_type'] == 'TEST'].copy()

# Add path in order to load the image in the ImageDataGenerator
df_train['X_ray_image_name'] = train_path + df_train['X_ray_image_name'].astype(str)
df_test['X_ray_image_name'] = test_path + df_test['X_ray_image_name'].astype(str)

In [8]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
0,0,data/train/IM-0128-0001.jpeg,Normal,TRAIN,,
1,1,data/train/IM-0127-0001.jpeg,Normal,TRAIN,,
2,2,data/train/IM-0125-0001.jpeg,Normal,TRAIN,,
3,3,data/train/IM-0122-0001.jpeg,Normal,TRAIN,,
4,4,data/train/IM-0119-0001.jpeg,Normal,TRAIN,,


In [9]:
X_train = df_train.sample(frac=0.8,random_state=200) #random state is a seed value
X_val = df_train.drop(X_train.index)

In [10]:
X_val.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
2,2,data/train/IM-0125-0001.jpeg,Normal,TRAIN,,
17,17,data/train/IM-0170-0001.jpeg,Normal,TRAIN,,
18,18,data/train/IM-0168-0001.jpeg,Normal,TRAIN,,
37,37,data/train/IM-0131-0001.jpeg,Normal,TRAIN,,
48,48,data/train/IM-0228-0001.jpeg,Normal,TRAIN,,


In [11]:
df_test.tail()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
5905,5928,data/test/person1637_virus_2834.jpeg,Pnemonia,TEST,,Virus
5906,5929,data/test/person1635_virus_2831.jpeg,Pnemonia,TEST,,Virus
5907,5930,data/test/person1634_virus_2830.jpeg,Pnemonia,TEST,,Virus
5908,5931,data/test/person1633_virus_2829.jpeg,Pnemonia,TEST,,Virus
5909,5932,data/test/person1632_virus_2827.jpeg,Pnemonia,TEST,,Virus


## Data Generator

In [12]:
# Training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range = 0.2,
        width_shift_range = 0.2,
        height_shift_range = 0.2,
        shear_range=0.2,
        horizontal_flip = True,
        zoom_range=0.2)

train_generator = train_datagen.flow_from_dataframe(
        dataframe = X_train,
        target_size = (IMG_SIZE, IMG_SIZE),
        directory = None,
        x_col='X_ray_image_name',
        y_col='Label',
        batch_size = BATCH_SIZE,
        classes = ["Normal", "Pnemonia"],
        class_mode = "categorical",
        shuffle=True)

# Validation
val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow_from_dataframe(
        dataframe = X_val,
        target_size = (IMG_SIZE, IMG_SIZE),
        directory = None,
        x_col='X_ray_image_name',
        y_col='Label',
        classes = ["Normal", "Pnemonia"],
        class_mode = "categorical",
        shuffle=True)

test_generator = val_datagen.flow_from_dataframe(
        dataframe = df_test,
        target_size = (IMG_SIZE, IMG_SIZE),
        directory = None,
        x_col='X_ray_image_name',
        y_col='Label',
        classes = ["Normal", "Pnemonia"],
        class_mode = "categorical",
        shuffle=True)

Found 4229 validated image filenames belonging to 2 classes.
Found 1057 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.


## Model training

In [17]:
early_stopping = EarlyStopping(patience=5, verbose=1, monitor='val_accuracy', mode='max')

In [18]:
# Based on the model of: https://ieeexplore.ieee.org/abstract/document/9057809
# Feature Extraction and Classification of Chest X-Ray Images Using CNN to Detect Pneumonia

model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(Activation(activations.relu))
model.add(Conv2D(32, (3,3)))
model.add(Activation(activations.relu))
model.add(Dropout(0.2))
model.add(MaxPool2D(pool_size = (3, 3)))

model.add(Conv2D(64, (3,3), input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(Activation(activations.relu))
model.add(Conv2D(128, (2,2)))
model.add(Activation(activations.relu))
model.add(MaxPool2D(pool_size = (2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation(activations.relu))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation(activations.relu))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))

model.compile(optimizer = Adam(learning_rate=0.0001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 62, 62, 32)        896       
_________________________________________________________________
activation_6 (Activation)    (None, 62, 62, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 60, 60, 32)        9248      
_________________________________________________________________
activation_7 (Activation)    (None, 60, 60, 32)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 60, 60, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 18, 18, 64)       

In [19]:
history = model.fit(train_generator, validation_data=val_generator, callbacks=[early_stopping], epochs=20, verbose = True)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 133 steps, validate for 34 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 00017: early stopping


## Model evaluation

In [20]:
# Evaluate the model on the test data using `evaluate`
print('\n# Evaluate on test data')
results = model.evaluate(test_generator)
print('test loss, test acc:', results)


# Evaluate on test data
  ...
    to  
  ['...']
test loss, test acc: [0.3043458864092827, 0.8798077]
