# Identification and Classification of Viral Pneumonia by Image-Based Deep Learning
Hamza Khokhar
Final Project
Professor Biwas

## Section 1: Preparing the data

In [7]:
# imports 
import os
import numpy as np
from tqdm import tqdm
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
import glob
import cv2
import matplotlib.pyplot as plt
from keras.applications.densenet import DenseNet121
from tensorflow.keras.layers import Conv2D, MaxPool2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.applications.vgg16 import VGG16

import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation, GlobalAveragePooling2D
from keras.constraints import maxnorm
from keras.layers.convolutional import Conv2D, MaxPooling2D
from sklearn.metrics import accuracy_score

In [8]:
# getting all the paths from corresponding images
paths_train_PNEUMONIA = glob.glob("chest_xray/train/PNEUMONIA/*.jpeg")
paths_train_NORMAL = glob.glob("chest_xray/train/NORMAL/*.jpeg")
paths_test_PNEUMONIA = glob.glob("chest_xray/test/PNEUMONIA/*.jpeg")
paths_test_NORMAL = glob.glob("chest_xray/test/NORMAL/*.jpeg")
paths_val_PNEUMONIA = glob.glob("chest_xray/val/NORMAL/*.jpeg")
paths_val_NORMAL = glob.glob("chest_xray/val/NORMAL/*.jpeg")

# Total images for each folder
print('Train PNEUMONIA: '+str(len(paths_train_PNEUMONIA)))
print('Train NORMAL: '+str(len(paths_train_NORMAL)))
print('Test PNEUMONIA: '+str(len(paths_test_PNEUMONIA)))
print('Test PNEUMONIA: '+str(len(paths_test_NORMAL)))
print('Val PNEUMONIA: '+str(len(paths_val_PNEUMONIA)))
print('Val Normal: '+str(len(paths_val_NORMAL)))

# Total images for each Directory
print('Total Number of Train samples: '+ str(len(paths_train_PNEUMONIA)+len(paths_train_NORMAL)))
print('Total Number of Test samples: '+ str(len(paths_test_PNEUMONIA)+len(paths_test_NORMAL)))
print('Total Number of Validation samples: '+ str(len(paths_val_PNEUMONIA)+len(paths_val_NORMAL)))

Train PNEUMONIA: 3875
Train NORMAL: 1341
Test PNEUMONIA: 398
Test PNEUMONIA: 242
Val PNEUMONIA: 0
Val Normal: 0
Total Number of Train samples: 5216
Total Number of Test samples: 640
Total Number of Validation samples: 0


In [9]:
train_images = []
train_labels = []
test_images = []
test_labels = []


for path in tqdm(paths_train_PNEUMONIA): 
    image= cv2.imread(path)
    image=cv2.resize(image, (225, 225))
    image=np.array(image)
    image = image.astype('float32')
    image /= 255 
    train_images.append(image)
    train_labels.append(1)

for path in tqdm(paths_train_NORMAL): 
    image= cv2.imread(path)
    image=cv2.resize(image, (225, 225))
    image=np.array(image)
    image = image.astype('float32')
    image /= 255 
    train_images.append(image)
    train_labels.append(0)
    
for path in tqdm(paths_test_PNEUMONIA): 
    image= cv2.imread(path)
    image=cv2.resize(image, (225, 225))
    image=np.array(image)
    image = image.astype('float32')
    image /= 255 
    test_images.append(image)
    test_labels.append(1)
    
for path in tqdm(paths_test_NORMAL): 
    image= cv2.imread(path)
    image=cv2.resize(image, (225, 225))
    image=np.array(image)
    image = image.astype('float32')
    image /= 255 
    test_images.append(image)
    test_labels.append(0)
    


100%|██████████| 3875/3875 [00:32<00:00, 120.12it/s]
100%|██████████| 1341/1341 [00:29<00:00, 46.11it/s]
100%|██████████| 398/398 [00:03<00:00, 132.58it/s]
100%|██████████| 242/242 [00:04<00:00, 55.48it/s]


In [10]:
train_images=np.array(train_images, np.float32)
train_labels=np.array(train_labels)
test_images=np.array(test_images, np.float32)
test_labels=np.array(test_labels)
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)




In [11]:
print(train_images.shape) 
print(train_labels.shape) 
print(test_images.shape) 
print(test_labels.shape) 

(5216, 225, 225, 3)
(5216, 2)
(640, 225, 225, 3)
(640, 2)


In [12]:
datagen = ImageDataGenerator(
        featurewise_center = False,
        samplewise_center = False,
        featurewise_std_normalization = False, 
        samplewise_std_normalization = False,
        zca_whitening = False,
        horizontal_flip = False,
        vertical_flip = False,
        rotation_range = 10,  
        zoom_range = 0.1, 
        width_shift_range = 0.1, 
        height_shift_range = 0.1)

datagen.fit(train_images)
train_gen = datagen.flow(train_images, train_labels, batch_size = 32,shuffle=True)

In [13]:
base_model = VGG16(
        weights=None,
        include_top=False, 
        input_shape=(225,225,3)
    )

In [14]:
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(4096, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(4096, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(1000, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(2,activation="sigmoid"))

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
learning_history = model.fit_generator((train_gen), 
                               epochs = 3, 
                               steps_per_epoch = train_images.shape[0] // 32,
                               validation_data = (train_images, train_labels))



Epoch 1/3

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(learning_history.history[met])
    ax[i].plot(learning_history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])