# Chest Analysis to detect X-ray image with COVID-19 

In [2]:
pwd

'C:\\Users\\Admin\\Desktop\\R projects\\COVID-19 -Chest-XRAY-Analysis'

In [3]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten, Activation
from keras import backend as K
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import os, glob, utils, random, shutil, warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [17]:
img_dir = './images/'
data = pd.read_csv('metadata.csv')

In [18]:
dt = data[['finding', 'filename']]
dt.head()

Unnamed: 0,finding,filename
0,COVID-19,auntminnie-a-2020_01_28_23_51_6665_2020_01_28_...
1,COVID-19,auntminnie-b-2020_01_28_23_51_6665_2020_01_28_...
2,COVID-19,auntminnie-c-2020_01_28_23_51_6665_2020_01_28_...
3,COVID-19,auntminnie-d-2020_01_28_23_51_6665_2020_01_28_...
4,COVID-19,nejmc2001573_f1a.jpeg


In [6]:
dt.isnull().any()

finding     False
filename    False
dtype: bool

In [27]:
# check for any duplicated
pd.DataFrame({'im': os.listdir(img_dir)}).nunique()

Unnamed: 0,im


In [8]:
# split images to train and test test
image_names = os.listdir(img_dir) # list of all images
#
train_names = dt.filename.tolist() # return list of all train names
test_names = []
#
for img in image_names:
    if img not in train_names:
        test_names.append(img)

In [10]:
dt.finding.unique()

array(['COVID-19', 'ARDS', 'SARS', 'Pneumocystis', 'Streptococcus',
       'No Finding', 'Chlamydophila', 'E.Coli', 'COVID-19, ARDS',
       'Klebsiella', 'Legionella'], dtype=object)

In [21]:
dt['finding']=dt['finding'].astype('category')

In [19]:
# create image directory for train and test loaded into img_dir workspace
dirs = ['train_imgs', 'test_imgs'] #folders to be created
all_files = './' # current directory path
for files in dirs:
    os.makedirs(os.path.join(all_files, files)) # creates the two files in the current directory

In [20]:
# split the train directory into sub directories
dirs = ['COVID-19', 'ARDS', 'SARS', 'Pneumocystis', 'Streptococcus','No Finding', 'Chlamydophila', 'E.Coli', 'COVID-19, ARDS',
       'Klebsiella', 'Legionella'] # train class sub directories
trn_dir = './train_imgs/' # train images directory path
for files in dirs:
    os.makedirs(os.path.join(trn_dir, files))

In [25]:
# move respectve images to their directories
image_dir = './images'
for filepath in glob.glob(image_dir +'/*'):
    if os.path.basename(filepath) in dt[dt['finding'] == 'COVID-19'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/COVID-19')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'ARDS'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/ARDS')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'SARS'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/SARS')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'Pneumocystis'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/Pneumocystis')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'Streptococcus'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/Streptococcus')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'No Finding'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/No Finding')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'Chlamydophila'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/Chlamydophila')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'E.Coli'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/E.Coli')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'COVID-19, ARDS'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/COVID-19, ARDS')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'Klebsiella'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/Klebsiella')
    elif os.path.basename(filepath) in dt[dt['finding'] == 'Legionella'].drop('finding', axis=1).filename.tolist():
        shutil.move(filepath, './train_imgs/Legionella')
    else:
        shutil.move(filepath, './test_imgs')

In [15]:
# global variables
random.seed(1234)
img_width, img_height = 128, 128
channels = 3
#
# reshape the data inputs
if K.image_data_format() == 'channels_first':
    input_shape = (channels, img_width, img_height)
else:
    input_shape = (img_width, img_height, channels)
# 
# algorithm to train model
model = Sequential()
model.add(Conv2D(32, (3,3), activation = 'relu', input_shape = input_shape))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPool2D(2,2))
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(Conv2D(128, (3,3), activation = 'relu'))
#
# layer flatten, dense and dropout time
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(11, activation = 'softmax'))
model.summary()
#
# compile
model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 61, 61, 64)        18496     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 59, 59, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 29, 29, 64)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 27, 27, 64)        36928     
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 25, 25, 64)       

In [13]:
# fit CNN to data generator and directory flow paths
train_datagen = ImageDataGenerator(rescale=1./255, 
                                  shear_range=0.2, 
                                  zoom_range=0.2, 
                                  horizontal_flip=True)
train_path = 'C:/Users/Admin/Desktop/R projects/COVID-19 -Chest-XRAY-Analysis/train_imgs'
train_gen = train_datagen.flow_from_directory(train_path, 
                                             target_size = (img_width, img_height),
                                             batch_size = 32,
                                             class_mode = 'categorical')
#
test_datagen = ImageDataGenerator(rescale=1./255)
test_gen = test_datagen.flow_from_directory('.', classes = ['test_imgs'],
                                           target_size = (img_width, img_height),
                                           batch_size = 32,
                                           class_mode = 'categorical')

Found 224 images belonging to 11 classes.
Found 4 images belonging to 1 classes.
