### **Download dataset**

In [None]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [2]:
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database

### **Extract dataset files**

In [4]:
import zipfile
import os
path = '/content/covid19-radiography-database.zip'
with zipfile.ZipFile(path, 'r') as zip:
  zip.extractall()
os.remove(path)

In [5]:
filenames = list()
dataset_path = '/content/COVID-19_Radiography_Dataset'
for dir, _, filename in os.walk(dataset_path):
  if 'images' in dir:
    filenames.append(filename)
filenames = [item for sublist in filenames for item in sublist]

In [6]:
import glob
all_image_paths = {os.path.basename(x): x for x in glob.glob(os.path.join(dataset_path, '*/', 'images', '*.png'))}

In [7]:
import pandas as pd
df = pd.DataFrame(data = {'image': filenames})
df['path'] = df.image.map(all_image_paths)
df['label'] = df.image.apply(lambda x: x.split('-')[0])
df.drop(columns = 'image', inplace = True)
df = df.sample(frac = 1)
df.reset_index(drop = True, inplace = True)

### **Dataset analysis**

In [None]:
import plotly.express as px
fig = px.bar(data_frame = df,
             x = df.label.value_counts().index,
             y = df.label.value_counts().values,
             text_auto = True)

fig.update_layout(title = 'Class distribution', 
                  xaxis_title = "class name", 
                  yaxis_title = "number of occurrences", 
                  legend_title = "Legend Title")

fig.show()

In [9]:
import cv2
def read_image(path, IMG_SIZE = (256, 256)):
  image = cv2.imread(path)
  image = cv2.resize(image, IMG_SIZE)
  return image

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import random
fig = make_subplots(rows = 2,
                    cols = 4,
                    subplot_titles = df.label.unique(), 
                    vertical_spacing = 0.1)

for index, label in enumerate(df.label.unique()):
  label_mask = df.label == label
  
  sample = df.loc[label_mask, 'path'].sample(1)
  image = read_image(sample.values[0])
  fig.add_trace(go.Image(z = image), row = 1, col = index + 1)

  sample = df.loc[label_mask, 'path'].sample(1)
  image = read_image(sample.values[0])
  fig.add_trace(go.Image(z = image), row = 2, col = index + 1)

fig.update_xaxes(visible = False)
fig.update_yaxes(visible = False)

fig.show()

### **Data preprocessing**

In [11]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
labels = encoder.fit_transform(df.values[:,1])

In [12]:
from sklearn.utils import class_weight
import numpy as np
class_weights = class_weight.compute_class_weight(class_weight = 'balanced',
                                                  classes = np.unique(labels),
                                                  y = labels)
weights_dict = {np.unique(labels)[i]: class_weights[i] for i in range(len(class_weights))}
weights = np.asarray(list(map(weights_dict.get, labels)))

In [13]:
from sklearn.preprocessing import OneHotEncoder
hot_encoder = OneHotEncoder()
labels = hot_encoder.fit_transform(labels.reshape(-1, 1))
labels = labels.toarray()

In [14]:
import tensorflow as tf
path_tensor = tf.convert_to_tensor(df.values[:,0])
label_tensor = tf.convert_to_tensor(labels)
weight_tensor = tf.convert_to_tensor(weights)
dataset = tf.data.Dataset.from_tensor_slices(tensors = (path_tensor, label_tensor, weight_tensor))

In [15]:
def load_images(path, label, weight, IMG_SIZE = (224, 224)):
  image = tf.io.read_file(path)
  image = tf.image.decode_image(image, channels = 3,
                                expand_animations = False)
  image = tf.image.resize(image, IMG_SIZE)
  image = tf.cast(image, dtype = tf.float32)
  label = tf.cast(label, dtype = tf.float16)
  weight = tf.cast(weight, dtype = tf.float16)
  return image, label, weight

In [16]:
dataset = dataset.map(load_images, num_parallel_calls=tf.data.AUTOTUNE)

In [17]:
DATASET_SIZE = len(dataset)

train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)
val_dataset = test_dataset.skip(val_size)
test_dataset = test_dataset.take(test_size)

In [18]:
BATCH_SIZE = 64
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

### **Building and training model**

In [57]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Flatten, Dense, Activation, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

NUM_CLASSES = 4
IMG_SIZE = (224, 224, 3)

def create_model(classes, shape, INIT_LR = 3e-4):
  feature_extractor = VGG16(include_top = False, weights = 'imagenet')
  feature_extractor.trainable = False

  inputs = Input(shape = shape, dtype = tf.float16, name = 'input_layer')
  extractor = feature_extractor(inputs)

  flatten = Flatten(name = 'flatten_layer')(extractor)

  classifier = Dense(2048)(flatten)
  classifier = Activation('relu', dtype = tf.float32)(classifier)
  classifier = Dropout(0.5)(classifier)
  classifier = BatchNormalization()(classifier)

  classifier = Dense(classes)(classifier)
  outputs = Activation(activation = 'softmax', 
                       dtype = tf.float32, 
                       name = 'softmax_output')(classifier)

  model = Model(inputs = inputs, outputs = outputs)

  model.compile(optimizer = Adam(learning_rate = INIT_LR),
                loss = 'categorical_crossentropy', 
                metrics = 'accuracy')
  
  return model 

model = create_model(NUM_CLASSES, IMG_SIZE)

In [58]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor = 'val_accuracy',
                              patience = 3,
                              verbose = 0,
                              restore_best_weights = False)

In [59]:
from tensorflow.keras.callbacks import LearningRateScheduler
EPOCHS = 100
scheduler = lambda x: 3e-4 * 0.95 ** (x + EPOCHS)
lr_scheduler = LearningRateScheduler(schedule =scheduler, verbose = 0)

In [60]:
from tensorflow.keras.callbacks import ModelCheckpoint
path = "CHECKPOINTS/cp.ckpt" 
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = path, 
                                                     montior = "val_accuracy",
                                                     save_best_only = True,
                                                     save_weights_only = True,
                                                     verbose = 0)

In [None]:
history = model.fit(x=train_dataset,
                    epochs=EPOCHS,
                    validation_data=val_dataset, 
                    batch_size = BATCH_SIZE,
                    verbose=2,
                    callbacks=[early_stopping, lr_scheduler, model_checkpoint])

In [None]:
df_history = pd.DataFrame(data = history.history)
subplot_titles = ['training', 'validation']
xaxis_title, yaxis_title = 'epoch', 'loss & accuracy'

fig = make_subplots(rows = 1, cols = 2, subplot_titles = subplot_titles)

fig.add_trace(go.Line(x = df_history.index, y = df_history.loss, name = 'loss'), row = 1, col = 1)
fig.add_trace(go.Line(x = df_history.index, y = df_history.accuracy, name = 'accuracy'), row = 1, col = 1)
fig.update_xaxes(title_text=xaxis_title, row = 1, col = 1)
fig.update_yaxes(title_text=yaxis_title, row = 1, col = 1)

fig.add_trace(go.Line(x = df_history.index, y = df_history.val_loss, name = 'val_loss'), row = 1, col = 2)
fig.add_trace(go.Line(x = df_history.index, y = df_history.val_accuracy, name = 'val_accuracy'), row = 1, col = 2)
fig.update_xaxes(title_text=xaxis_title, row = 1, col = 2)
fig.update_yaxes(title_text=yaxis_title, row = 1, col = 2)

fig.show()

### **Model evaluation**

In [None]:
_model = create_model(NUM_CLASSES, IMG_SIZE)
_model.load_weights(path)

In [None]:
loss, accuracy = _model.evaluate(test_dataset)

In [48]:
import numpy as np
y_pred = _model.predict(test_dataset)
y_pred = np.array([np.argmax(x) for x in y_pred])

y_true = [element[1] for element in test_dataset.unbatch().as_numpy_iterator()]
y_true = np.array([np.argmax(x) for x in y_true])

In [49]:
conf_matrix = tf.math.confusion_matrix(y_true, y_pred)
conf_matrix = conf_matrix.numpy()
class_names = encoder.classes_

In [None]:
fig = px.imshow(conf_matrix,
                labels = dict(x = "predicted sample", y="true sample"),
                x = class_names,
                y = class_names,
                text_auto = True,
                title = 'Confusion matrix')

fig.update_traces(showlegend=False)
fig.update_xaxes(side="top")

fig.show()