## Setup


In [None]:
import numpy as np
from typing import Tuple
from scipy import special
from sklearn import metrics
import sklearn

from matplotlib import pyplot as plt

from glob import glob
import os
import shutil

import pandas as pd
from tqdm.notebook import tqdm

import cv2

import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()
import tensorflow_datasets as tfds

# Set verbosity.
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from sklearn.exceptions import ConvergenceWarning

import warnings
warnings.simplefilter(action="ignore", category=ConvergenceWarning)
warnings.simplefilter(action="ignore", category=FutureWarning)

## Download Dataset

https://drive.google.com/drive/folders/198iMVj0xTOU7-FEPhuopc2wXGnKpSCW1?usp=sharing

In [None]:
dataset_folder = 'training'
image_files = glob(os.path.join(dataset_folder,'*/*.png'))
image_files = pd.Series(image_files).str.rsplit('/',n=1,expand=True)
image_files.columns=['Folder','Image Index']

images = pd.read_csv(os.path.join(dataset_folder,'Data_Entry_2017.csv'))
images = image_files.merge(images.join(images['Finding Labels'].str.get_dummies()),on='Image Index',how='left')
dataset = images[['Folder', 'Image Index', 'Pneumonia']].copy()

dataset.rename(columns={'Pneumonia':'label'},inplace=True)
num_classes = dataset.label.nunique()

In [None]:
print(dataset)

In [None]:
activation = 'relu'
epoch_toptrain = 20
total_epochs = 100
batchsize = 16

val_freq = 5
val_split = 0.2

imagesize=(256,256,3)

lr_stage1 = 0.001
lr_stage2 = 1e-5

seed = 123
shuffle = True
layer_map = {1:'grayscale',3:'rgb'}

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(dataset_folder,
                                                       color_mode=layer_map[imagesize[2]],
                                                       image_size=imagesize[:2],
                                                       shuffle=shuffle,
                                                       label_mode='categorical',
                                                       validation_split=val_split,
                                                       batch_size=batchsize,
                                                       seed=seed,
                                                       subset='training')
val_ds = tf.keras.utils.image_dataset_from_directory(dataset_folder,
                                                       color_mode=layer_map[imagesize[2]],
                                                       image_size=imagesize[:2],
                                                       shuffle=shuffle,
                                                       label_mode='categorical',
                                                       validation_split=val_split,
                                                       batch_size=batchsize,
                                                       seed=seed,
                                                       subset='validation')

In [None]:
efficientnet = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(include_top = False,
                                                             weights='imagenet',
                                                             input_shape=imagesize,
                                                             pooling='max')

x = efficientnet.output
x = tf.keras.layers.Dense(1024, activation='relu')(x)

predictions = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

net = tf.keras.Model(inputs = efficientnet.input, outputs = predictions)

for layer in efficientnet.layers:
    layer.trainable = False

net.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    metrics=['accuracy'])

net.summary()

In [None]:
checkpoint_filepath='checkpoint/efficientnet_stage1'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

net.fit(train_ds,
         validation_data=val_ds,
         epochs=epoch_toptrain,
         validation_freq=val_freq,
         callbacks=[model_checkpoint_callback],
         verbose=1)

In [None]:
imagesize_top = (224,224,3)

train_ds_top = tf.keras.utils.image_dataset_from_directory(dataset_folder,
                                                       color_mode=layer_map[imagesize_top[2]],
                                                       image_size=imagesize_top[:2],
                                                       shuffle=shuffle,
                                                       label_mode='categorical',
                                                       validation_split=val_split,
                                                       batch_size=batchsize,
                                                       seed=seed,
                                                       subset='training')
val_ds_top = tf.keras.utils.image_dataset_from_directory(dataset_folder,
                                                       color_mode=layer_map[imagesize_top[2]],
                                                       image_size=imagesize_top[:2],
                                                       shuffle=shuffle,
                                                       label_mode='categorical',
                                                       validation_split=val_split,
                                                       batch_size=batchsize,
                                                       seed=seed,
                                                       subset='validation')

In [None]:
efficientnet_top = tf.keras.applications.efficientnet_v2.EfficientNetV2B0(include_top = True,
                                                                          weights='imagenet')

x = efficientnet_top.get_layer('top_dropout').output
x = tf.keras.layers.Dense(num_classes, activation='softmax', name='predictions')(x)

net_top = tf.keras.Model(inputs = efficientnet_top.input, outputs = x)

for layer in efficientnet_top.layers[:267]:
    layer.trainable = False

net_top.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_stage1),
    metrics=['accuracy'])

net_top.summary()

In [None]:
tf.keras.utils.plot_model(net_top, to_file= 'model.png', show_shapes = True, show_layer_names = True, show_trainable = True, show_layer_activations = True)

In [None]:
checkpoint_filepath='checkpoint/efficientnet_stage1_top'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history_stage1 = net_top.fit(train_ds_top,
                             validation_data=val_ds_top,
                             epochs=epoch_toptrain,
                             validation_freq=val_freq,
                             callbacks=[model_checkpoint_callback],
                             verbose=1)

In [None]:
for i, layer in enumerate(net_top.layers):
   print(i, layer.name)

In [None]:
for layer in net_top.layers[:252]:
    layer.trainable = False

for layer in net_top.layers[252:]:
    layer.trainable = True

net_top.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_stage2),
    metrics=['accuracy'])

checkpoint_filepath='checkpoint/efficientnet_stage2_top'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history_stage2 = net_top.fit(train_ds_top,
                             validation_data=val_ds_top,
                             epochs=total_epochs-epoch_toptrain,
                             validation_freq=val_freq,
                             callbacks=[model_checkpoint_callback],
                             verbose=1)