In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
import time
import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import PIL
import cv2
import shutil
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
train_df = pd.read_csv('../input/covidx-cxr2/train.txt', sep=" ", header=None)
train_df.columns=['patient id', 'file_paths', 'labels', 'data source']
train_df=train_df.drop(['patient id', 'data source'], axis=1 )

In [None]:
train_df.head()

In [None]:
test_df = pd.read_csv('../input/covidx-cxr2/test.txt', sep=" ", header=None)
test_df.columns=['id', 'file_paths', 'labels', 'data source' ]
test_df=test_df.drop(['id', 'data source'], axis=1 )

In [None]:
test_df.head()

In [None]:
train_path = '../input/covidx-cxr2/train/'
test_path = '../input/covidx-cxr2/test/'

In [None]:
train_df['labels'].value_counts()

In [None]:
file_count = 13992
samples = []
for category in train_df['labels'].unique():    
    category_slice = train_df.query("labels == @category")    
    samples.append(category_slice.sample(file_count, replace=False,random_state=1))
train_df = pd.concat(samples, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)
print ( train_df['labels'].value_counts())
print (len(train_df))

In [None]:
train_df, valid_df = train_test_split(train_df, train_size=0.9, random_state=0)

In [None]:
print(train_df.labels.value_counts())
print(valid_df.labels.value_counts())
print(test_df.labels.value_counts())

In [None]:
target_size=(224,224)
batch_size=64

In [None]:
train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input, horizontal_flip=True, zoom_range=0.1)
test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet_v2.preprocess_input)
train_gen = train_datagen.flow_from_dataframe(train_df, directory=train_path, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='binary')
valid_gen = test_datagen.flow_from_dataframe(valid_df, directory=train_path, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='binary')
test_gen = test_datagen.flow_from_dataframe(test_df, directory=test_path, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='binary')

In [None]:
base_model = tf.keras.applications.ResNet50V2(include_top=False, input_shape=(224,224,3),weights = 'imagenet')


In [None]:
model = tf.keras.Sequential([
    base_model, 
    tf.keras.layers.GlobalAveragePooling2D(), 
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.BatchNormalization(), 
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
patience = 1
stop_patience = 3
factor = 0.5

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("classify_model.h5", save_best_only=True, verbose = 0),
    tf.keras.callbacks.EarlyStopping(patience=stop_patience, monitor='val_loss', verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, verbose=1)
]

In [None]:
epochs = 20
history = model.fit(train_gen, validation_data=valid_gen, steps_per_epoch = 100, epochs=epochs, callbacks=callbacks, verbose=1)

In [None]:
plt.plot(history.history['loss'], label='Loss (training data)')
plt.plot(history.history['val_loss'], label='Loss (validation data)')
plt.title('Loss for Training')
plt.ylabel('Loss')
plt.xlabel('No. epoch')
plt.legend(['train', 'validation'], loc="upper left")
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
from matplotlib import pyplot
pyplot.subplot(211)
pyplot.title('Loss')
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
# plot accuracy during training
pyplot.subplot(212)
pyplot.title('Accuracy')
pyplot.plot(history.history['accuracy'], label='train')
pyplot.plot(history.history['val_accuracy'], label='test')
pyplot.legend()
pyplot.tight_layout()
pyplot.show()

In [None]:
test_gen.class_indices

In [None]:
model.evaluate_generator(generator=valid_gen,steps=10)

In [None]:
model.evaluate_generator(generator=test_gen,steps=10)

In [None]:
pred=model.predict_generator(test_gen)


In [None]:
preds=pred.round(decimals=0)

In [None]:
test_df['labels'][test_df['labels']=='positive']=1.0
test_df['labels'][test_df['labels']=='negative']=0.0

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')
print(confusion_matrix(test_gen.classes, preds))
mat = confusion_matrix(test_gen.classes, preds)
print('Classification Report')
target_names = ['NonCovid', 'Covid']
print(classification_report(test_gen.classes, preds, target_names=target_names))

In [None]:
import seaborn as sns
ax= plt.subplot()
sns.heatmap(mat, annot=True, fmt='g', ax=ax);  

# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(['Noncovid', 'Covid']); ax.yaxis.set_ticklabels(['Noncovid', 'Covid']);