In [0]:
import pandas as pd
import cv2
import numpy as np
import argparse
import os
#Package used for visualization
import matplotlib.pyplot as plt
# Package used for Modeling
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

In [0]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense

In [0]:
import urllib.request

In [0]:
meta = pd.read_csv('metadata.csv')
dataset = meta[['finding', 'filename']]

Covid = dataset.loc[dataset['finding']=='COVID-19', ]
ARDS = dataset.loc[dataset['finding']=='ARDS', ]
SARS = dataset.loc[dataset['finding']=='SARS', ]
Pneumocystis = dataset.loc[dataset['finding']=='Pneumocystis', ]
Streptococcus = dataset.loc[dataset['finding']=='Streptococcus', ]
NoFinding = dataset.loc[dataset['finding']=='No Finding', ]
Other = pd.concat([ARDS, SARS, Pneumocystis, Streptococcus, NoFinding])

Other['finding'] = 'OTHER'

In [0]:
Covid = Covid.reset_index()
Other = Other.reset_index()

Covid_train = Covid.sample(20)
Other_train = Other.sample(20)

train_data = pd.concat([Covid_train, Other_train])

train_data['label'] = 0
train_data.loc[train_data['finding']=='COVID-19', 'label'] = 1

In [0]:
Covid_test =  Covid.loc[~Covid.index.isin(Covid_train.index)].sample(5)
Other_test = Other.loc[~Other.index.isin(Other_train.index)]

test_data = pd.concat([Covid_test, Other_test])

test_data['label'] = 0

test_data.loc[test_data['finding']=='COVID-19', 'label'] = 1

train_labels = train_data.loc[:,['label']]
test_labels = test_data.loc[:,['label']]

In [0]:
data_labels = pd.concat([train_labels, test_labels])

train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

for i in range(len(train_data)):
  train_data.loc[i, 'Path'] = "images/" + train_data.loc[i, 'filename']

for i in range(len(test_data)):
  test_data.loc[i, 'Path'] = "images/" + test_data.loc[i, 'filename']

In [0]:
for i in range(len(train_data)):
  urllib.request.urlretrieve('https://raw.githubusercontent.com/ieee8023/covid-chestxray-dataset/master/images/' + train_data.loc[i, 'filename'], 'test_data/' + train_data.loc[i, 'filename'])

In [0]:
for i in range(len(test_data)):
  urllib.request.urlretrieve('https://raw.githubusercontent.com/ieee8023/covid-chestxray-dataset/master/images/' + test_data.loc[i, 'filename'], 'test_data/' + test_data.loc[i, 'filename'])

In [0]:
def process_image(path):
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img_scaled = cv2.resize(img,(224, 224), interpolation = cv2.INTER_AREA)
  img_scaled = np.array(img_scaled/255.)
  return img_scaled

In [0]:
train_images = []
for i in range(len(train_data)):
  im = process_image('train_data/' + train_data['Path'][i])
  train_images.append(im)
  if i % 5 ==0:
    print(i)
    
train_images = np.array(train_images)
train_labels = np.array(train_labels)

In [0]:
test_images = []
for i in range(len(test_data)):
  im = process_image('test_data/' + test_data['Path'][i])
  test_images.append(im)
  if i % 5 ==0:
    print(i)

test_images = np.asarray(test_images)
test_labels = np.asarray(test_labels)

In [25]:
lrate = 1e-3
epochs = 25
batch_size = 8
train_augmentation = ImageDataGenerator(rotation_range=15, fill_mode="nearest")

modelbase = ResNet50(weights="imagenet", include_top=False,input_tensor=Input(shape=(224, 224, 3)))

modelh = modelbase.output
modelh = AveragePooling2D(pool_size=(4, 4))(modelh)
modelh = Flatten(name="flatten")(modelh)
modelh = Dense(64, activation="relu")(modelh)
modelh = Dropout(0.5)(modelh)
modelh = Dense(2, activation="softmax")(modelh)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
model = Model(inputs=modelbase.input, outputs=modelh)
for layer in modelbase.layers:
    layer.trainable = False
    
label_binarizer = LabelBinarizer()
trainY = label_binarizer.fit_transform(train_data.loc[:,['finding']])
trainY = to_categorical(trainY)
testY = label_binarizer.fit_transform(test_data.loc[:,['finding']])
testY = to_categorical(testY)

In [28]:
opt = Adam(lr=lrate, decay=lrate / epochs)
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])

history = model.fit_generator(train_augmentation.flow(train_images, trainY, batch_size=batch_size),
                        steps_per_epoch=len(train_images) // batch_size,
                        validation_steps=len(test_images) // batch_size,
                        validation_data=(test_images, testY),
                        epochs=epochs)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [53]:
model.evaluate([1,2,3,4], epochs)

IndexError: ignored

In [50]:
history.history['accuracy']


[0.4749999940395355,
 0.5249999761581421,
 0.30000001192092896,
 0.5249999761581421,
 0.4749999940395355,
 0.550000011920929,
 0.5249999761581421,
 0.4000000059604645,
 0.5,
 0.5249999761581421,
 0.574999988079071,
 0.625,
 0.5,
 0.574999988079071,
 0.5,
 0.699999988079071,
 0.550000011920929,
 0.4000000059604645,
 0.550000011920929,
 0.4749999940395355,
 0.699999988079071,
 0.6499999761581421,
 0.550000011920929,
 0.5,
 0.6499999761581421]