Covid-19 X-ray predictor
========================

In [168]:
import os
import pandas as pd
import shutil
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os
pd.options.display.max_colwidth = 10000

Preparing the data
------------------

In [15]:
def not_found(path: str) -> bool:
    return not os.path.exists(path)    

In [76]:
base = ".."
data_path = os.path.join(base, "data")
metadata = pd.read_csv(os.path.join(data_path, "metadata.csv"))


In [94]:
pa = metadata.where((metadata.view == "PA") & (metadata.modality == "X-ray"))[["finding", "path"]].dropna()
print(pa.shape)
pa

(99, 2)


Unnamed: 0,finding,path
0,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/auntminnie-a-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
1,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/auntminnie-b-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
2,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/auntminnie-c-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
3,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/auntminnie-d-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
4,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/nejmc2001573_f1a.jpeg
...,...,...
150,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/figure1-5e75d0940b71e1b702629659-98-right.jpeg
151,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/figure1-5e71be566aa8714a04de3386-98-left.jpeg
155,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/2966893D-5DDF-4B68-9E2B-4979D5956C8E.jpeg
156,COVID-19,/data/sources/covid-19-cv/data/PA/COVID-19/B2D20576-00B7-4519-A415-72DE29C90C34.jpeg


In [95]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


In [106]:
def get_image(image_path: str):
    image = cv2.imread(image_path)
    return cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (224, 224))

def image_loader(row):
    if(not_found(row["path"])):
       print("NOT FOUND")
    return get_image(row["path"])

In [157]:
pa["image"] = pa.apply(image_loader, axis=1)
pa["label"] = pa.apply(lambda row: "COVID-19" if row["finding"]=="COVID-19" else "other", axis=1)
pa.shape

(99, 4)

In [158]:
lbs = pa["label"].to_numpy()
lbs.shape

(99,)

Preparing the model
-------------------

In [169]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


In [152]:
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = to_categorical(lb.fit_transform(lbs))
labels.shape

(99, 2)

In [179]:
data =  np.array(pa["image"].to_list()) / 255.0 #scale intensities to the range [0, 255]
data.shape

(99, 224, 224, 3)

In [180]:
(trainX, testX, trainY, testY) = train_test_split(data, labels,
	test_size=0.2, stratify=labels, random_state=42)

In [181]:
INIT_LR = 1e-3 #learning rate
EPOCHS = 25 
BS = 8 #batch size

In [182]:
# initialize the training data augmentation object
trainAug = ImageDataGenerator(
	rotation_range=15,
	fill_mode="nearest")

In [183]:
# load the VGG16 network, ensuring the head FC layer sets are left
# off
baseModel = VGG16(weights="imagenet", include_top=False,
	input_tensor=Input(shape=(224, 224, 3)))

# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(2, activation="softmax")(headModel)

In [184]:

# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)

In [185]:

# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers:
	layer.trainable = False

In [186]:

# compile our model
print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR)#, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
	metrics=["accuracy"])


[INFO] compiling model...


In [187]:
# train the head of the network
print("[INFO] training head...")
H = model.fit(
	trainAug.flow(trainX, trainY, batch_size=BS),
	steps_per_epoch=len(trainX) // BS,
	validation_data=(testX, testY),
	validation_steps=len(testX) // BS,
	epochs=EPOCHS)

[INFO] training head...
Instructions for updating:
Please use Model.fit, which supports generators.
  ...
    to  
  ['...']
Train for 9 steps, validate on 20 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
