In [1]:
!pip install openpyxl
!pip install kaggle



In [2]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

rm: cannot remove '/root/.kaggle': No such file or directory


In [3]:
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database

Downloading covid19-radiography-database.zip to /content
 98% 732M/745M [00:05<00:00, 168MB/s]
100% 745M/745M [00:05<00:00, 151MB/s]


In [4]:
!unzip -q /content/covid19-radiography-database.zip

In [22]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split

# For visualizing images
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.image as mpimg
import random
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# For augmenting data
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# For modelling
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation
from tensorflow.keras import Sequential, layers
import tensorflow_hub as hub
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import load_model

# For evaluation
from sklearn.metrics import classification_report, confusion_matrix, roc_curve

import os

In [6]:
covid_df = pd.read_excel("/content/COVID-19_Radiography_Dataset/COVID.metadata.xlsx")
normal_df = pd.read_excel("/content/COVID-19_Radiography_Dataset/Normal.metadata.xlsx")

In [7]:
covid_df['TARGET'] = 'Positive'
normal_df['TARGET'] = 'Negative'
normal_df.head()

Unnamed: 0,FILE NAME,FORMAT,SIZE,URL,TARGET
0,NORMAL-1,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
1,NORMAL-2,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
2,NORMAL-3,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
3,NORMAL-4,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
4,NORMAL-5,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative


In [8]:
covid_df['FILE NAME'] = covid_df.apply(lambda x: "COVID/" + x['FILE NAME'] + "." + str.lower(x['FORMAT']), axis=1)
normal_df['FILE NAME'] = normal_df.apply(lambda x: "Normal/" + str.capitalize(x['FILE NAME']) + "." + str.lower(x['FORMAT']), axis=1)
normal_df.head()

Unnamed: 0,FILE NAME,FORMAT,SIZE,URL,TARGET
0,Normal/Normal-1.png,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
1,Normal/Normal-2.png,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
2,Normal/Normal-3.png,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
3,Normal/Normal-4.png,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative
4,Normal/Normal-5.png,PNG,256*256,https://www.kaggle.com/c/rsna-pneumonia-detect...,Negative


In [9]:
df = pd.concat([normal_df[['FILE NAME', 'TARGET']], 
                covid_df[['FILE NAME','TARGET']]], ignore_index=True)
df.head()

Unnamed: 0,FILE NAME,TARGET
0,Normal/Normal-1.png,Negative
1,Normal/Normal-2.png,Negative
2,Normal/Normal-3.png,Negative
3,Normal/Normal-4.png,Negative
4,Normal/Normal-5.png,Negative


In [10]:
train, test = train_test_split(df, test_size=0.2, stratify=df['TARGET'], random_state=42)
print(train.shape)
print(test.shape)

(11046, 2)
(2762, 2)


In [11]:
train_df, valid_df = train_test_split(train, test_size=0.2, stratify=train['TARGET'], random_state=42)
print(train_df.shape)
print(valid_df.shape)

(8836, 2)
(2210, 2)


In [12]:
DATA_DIR = "/content/COVID-19_Radiography_Dataset"
IMAGE_SIZE = (512, 512)

non_aug_gen = ImageDataGenerator(rescale=1/255.)
aug_gen = ImageDataGenerator(
    rescale=1./255,
    width_shift_range=0.15,
    height_shift_range=0.15,
    zoom_range=[0.9, 1.25],
    brightness_range=[0.5, 1.5]
)

aug_train = aug_gen.flow_from_dataframe(
    dataframe=train_df,
    directory=DATA_DIR,
    x_col='FILE NAME',
    y_col='TARGET',
    target_size=IMAGE_SIZE,
    batch_size=32,
    class_mode='binary', 
    color_mode='rgb',
    shuffle=True
)

non_aug_valid = non_aug_gen.flow_from_dataframe(
    dataframe=valid_df,
    directory=DATA_DIR,
    x_col='FILE NAME',
    y_col='TARGET',
    target_size=IMAGE_SIZE,
    batch_size=32,
    class_mode='binary', 
    color_mode='rgb',
    shuffle=True
)

test_valid_gen = non_aug_gen.flow_from_dataframe(
    dataframe=valid_df,
    directory=DATA_DIR,
    x_col='FILE NAME',
    y_col='TARGET',
    target_size=IMAGE_SIZE,
    batch_size=32,
    class_mode='binary', 
    color_mode='rgb',
    shuffle=False
)

test_test_gen = non_aug_gen.flow_from_dataframe(
    dataframe=test,
    directory=DATA_DIR,
    x_col='FILE NAME',
    y_col='TARGET',
    target_size=IMAGE_SIZE,
    batch_size=32,
    class_mode='binary', 
    color_mode='rgb',
    shuffle=False
)

Found 8836 validated image filenames belonging to 2 classes.
Found 2210 validated image filenames belonging to 2 classes.
Found 2210 validated image filenames belonging to 2 classes.
Found 2762 validated image filenames belonging to 2 classes.


In [13]:
# Add reduced LR to callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",  
                                                 factor=0.2, # multiply the learning rate by 0.2 (reduce by 5x)
                                                 patience=4,
                                                 verbose=1, # print out when learning rate goes down 
                                                 min_lr=1e-7)

checkpoint = tf.keras.callbacks.ModelCheckpoint('model_.h5', save_best_only=True, monitor='val_loss', mode=min)



# EfficientNetV2-XL

In [14]:
efficientnet_url = 'https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2'

In [15]:
def create_model(model_url, num_classes=1):
    """Takes a TensorFlow Hub URL and creates a Keras Sequential model with it.

    Args:
    model_url (str): A TensorFlow Hub feature extraction URL.
    num_classes (int): Number of output neurons in output layer,
    should be equal to number of target classes, default 10.

    Returns:
    An uncompiled Keras Sequential model with model_url as feature
    extractor layer and Dense output layer with num_classes outputs.
    """
    # Download the pretrained model and save it as a Keras layer
    feature_extractor_layer = hub.KerasLayer(model_url,
                                          trainable=False, # freeze the underlying patterns
                                          name='feature_extraction_layer',
                                          input_shape=(512, 512, 3,)) # define the input image shape

    # Create our own model
    model = tf.keras.Sequential([
    feature_extractor_layer, # use the feature extraction layer as the base
    layers.Dense(num_classes, activation='sigmoid', name='output_layer') # create our own output layer      
    ])

    return model

In [16]:
tf.random.set_seed(42)

metrics = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'), 
    'accuracy',
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc', curve='ROC')
]

# Build model
model_efficientnet = create_model(efficientnet_url)

# Compile model
model_efficientnet.compile(loss='binary_crossentropy',
                           optimizer=Adam(),
                           metrics=metrics)

In [17]:
model_efficientnet.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 feature_extraction_layer (K  (None, 1280)             207615832 
 erasLayer)                                                      
                                                                 
 output_layer (Dense)        (None, 1)                 1281      
                                                                 
Total params: 207,617,113
Trainable params: 1,281
Non-trainable params: 207,615,832
_________________________________________________________________


In [19]:
# Train the model
history_efficientnet = model_efficientnet.fit(aug_train, epochs=3,
                                              validation_data=non_aug_valid,
                                              steps_per_epoch=len(aug_train),
                                              validation_steps=len(non_aug_valid),
                                              callbacks=[reduce_lr, checkpoint])

np.save('history_efficientnet.npy', history_efficientnet.history)

Epoch 1/3
Epoch 2/3
Epoch 3/3


# Coba load history file & model file

In [25]:
history_test = np.load('history_efficientnet.npy', allow_pickle='TRUE').item()
history_test

{'accuracy': [0.8598913550376892, 0.9061792492866516, 0.9198732376098633],
 'auc': [0.9117060303688049, 0.9545187950134277, 0.9670813679695129],
 'fn': [1036.0, 645.0, 540.0],
 'fp': [202.0, 184.0, 168.0],
 'loss': [0.3377688527107239, 0.24416062235832214, 0.21054047346115112],
 'lr': [0.001, 0.001, 0.001],
 'precision': [0.8635135293006897, 0.9007015824317932, 0.9134912490844727],
 'recall': [0.5522903800010681, 0.7212618589401245, 0.7666378617286682],
 'tn': [6320.0, 6338.0, 6354.0],
 'tp': [1278.0, 1669.0, 1774.0],
 'val_accuracy': [0.8941176533699036, 0.9108597040176392, 0.9348416328430176],
 'val_auc': [0.9636924266815186, 0.9749817252159119, 0.9783760905265808],
 'val_fn': [211.0, 183.0, 120.0],
 'val_fp': [23.0, 14.0, 24.0],
 'val_loss': [0.24498014152050018, 0.21038883924484253, 0.17811089754104614],
 'val_precision': [0.9411764740943909, 0.9658536314964294, 0.9503105878829956],
 'val_recall': [0.6355785727500916, 0.6839378476142883, 0.7927461266517639],
 'val_tn': [1608.0, 161

In [23]:
model_test = load_model('model_.h5', compile=False, custom_objects={'KerasLayer': hub.KerasLayer})
model_test.predict(test_test_gen)

array([[0.32656237],
       [0.17349021],
       [0.06400581],
       ...,
       [0.01621835],
       [0.2652475 ],
       [0.01887699]], dtype=float32)