# Assign Label

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

import warnings
warnings.filterwarnings("ignore")
print(tf.__version__)

In [None]:
dir_path = 'D:/TA_Hannah Georgina_1822007/dataset/'
results_dir = os.path.join(dir_path, 'logs/InceptionResNet/')
models_dir = os.path.join(dir_path, 'models/InceptionResNet/')

if not os.path.isdir(results_dir):
    os.makedirs(results_dir)
    
if not os.path.isdir(models_dir):
    os.makedirs(models_dir)

In [None]:
# define the location of your dataset
TRAIN_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Training_Set/data_train/'
TRAIN_LABEL_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Training_Set/RFMiD_Training_Labels.csv'

VAL_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Evaluation_Set/data_val/'
VAL_LABEL_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Evaluation_Set/RFMiD_Validation_Labels.csv'

IMG_DIM = (299,299)
INPUT_SHAPE = (299,299,3)

BATCH_SIZE = 32
EPOCH = 20
x_axis_inc = 1 # for plotting the training acc and loss

In [None]:
df = pd.read_csv(TRAIN_LABEL_PATH)
data_labels = df[['ID', 'Disease_Risk']]
target_labels = data_labels['Disease_Risk']
data_labels['image_path'] =  data_labels.apply(lambda row: (TRAIN_PATH + str(row['ID']) + '.png'), axis=1)
data_labels.head()

In [None]:
val_df = pd.read_csv(VAL_LABEL_PATH, sep=';')
val_labels = val_df[['ID', 'Disease_Risk']]
target_val_labels = val_labels['Disease_Risk']
val_labels['image_path'] =  val_labels.apply(lambda row: (VAL_PATH + str(row['ID']) + '.png'), axis=1)
val_labels.head()

# Training Model

## Prepare Data Training and Validation

In [None]:
# Load dataset
train_data = np.array([img_to_array(load_img(img, target_size=IMG_DIM))
                       for img in data_labels['image_path'].values.tolist()]).astype('float32')

In [None]:
# Load dataset
val_data = np.array([img_to_array(load_img(img, target_size=IMG_DIM))
                       for img in val_labels['image_path'].values.tolist()]).astype('float32')

In [None]:
print('Training and Validation Dataset Size:', train_data.shape, val_data.shape)

## Prepare for Transfer Learning

In [None]:
prep_in = tf.keras.applications.inception_resnet_v2.preprocess_input

In [None]:
x_train_new = train_data.astype('int')
x_val_new = val_data.astype('int')

In [None]:
x_train_in = prep_in(x_train_new)
x_val_in = prep_in(x_val_new)

In [None]:
train_labels_enc = target_labels.to_numpy()
val_labels_enc = target_val_labels.to_numpy()

In [None]:
# Compare the input value before and after preprocessed
x_train_new[0][80:85, 20:25, 0]

In [None]:
x_train_in[0][80:85, 20:25, 0]

In [None]:
# Get the VGG16 model so we can do transfer learning
base_model = tf.keras.applications.InceptionResNetV2(input_shape=INPUT_SHAPE, include_top=False, weights='imagenet')

In [None]:
# Let's take a look to see how many Layers are in the base_model
print('Number of layers in the base model: ', len(base_model.layers))

In [None]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

## Feature Extraction

In [None]:
base_model.trainable = False

pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers[10:]]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
fig_train = 'InceptionResNetV2_C_FE_train'
log_file = os.path.join(results_dir, 'InceptionResNetV2_C_FE_train.csv')
model_path = os.path.join(models_dir, 'InceptionResNetV2_C_FE.h5')

In [None]:
# Create a new model on top

inputs = tf.keras.Input(shape=INPUT_SHAPE)
# We make sure that the base_model is running in inference mode here,
# by passing 'training=False'. This is important for fine-tuning
x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)

# A Dense classifier with a single unit (binary classification)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

In [None]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
model.summary()

In [None]:
callbacks = [CSVLogger(log_file), ModelCheckpoint(filepath=model_path, monitor='val_binary_accuracy', mode='max', 
                                                  save_best_only=True, verbose=0)]

In [None]:
# More recommend way
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer= keras.optimizers.RMSprop(lr=1e-3),
              metrics = [keras.metrics.BinaryAccuracy()])

In [None]:
history = model.fit(x=x_train_in, y=train_labels_enc,
                    validation_data=(x_val_in, val_labels_enc),
                    epochs=EPOCH, verbose=1, callbacks=callbacks)

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
epoch_list = list(range(1,EPOCH+1))
y_train_acc = history.history['binary_accuracy']
y_val_acc = history.history['val_binary_accuracy']
y_train_loss = history.history['loss']
y_val_loss = history.history['val_loss']

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,4))
t = f.suptitle('Pre-Trained InceptionResNetV2-C (Feature Extraction) Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

ax1.plot(epoch_list, y_train_acc, label='Train Accuracy')
ax1.plot(epoch_list, y_val_acc, label='Validation Accuracy')
ax1.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax1.set_ylim(0.4,1)
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc='best')

ax2.plot(epoch_list, y_train_loss, label='Train Loss')
ax2.plot(epoch_list, y_val_loss, label='Validation Loss')
ax2.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax2.set_ylim(0,1)
ax2.set_ylabel('Cross Entropy')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l1 = ax2.legend(loc='best')

## Fine Tuning-1

In [None]:
# Fine tune from this layer onwards, 1/5 of total layers
fine_tune_at = 170

# Freeze all the layers before the 'fine_tune_at' layer
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

In [None]:
# Let's check whether the layers are already trainable
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers[13:]]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
fig_train2 = 'InceptionResNetV2_C_FT1_train'
log_file2 = os.path.join(results_dir, 'InceptionResNetV2_C_FT1_train.csv')
model_path2 = os.path.join(models_dir, 'InceptionResNetV2_C_FT1.h5')

In [None]:
# Create a new model on top

inputs = tf.keras.Input(shape=INPUT_SHAPE)
# We make sure that the base_model is running in inference mode here,
# by passing 'training=False'. This is important for fine-tuning
x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)

# A Dense classifier with a single unit (binary classification)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

In [None]:
model.summary()

In [None]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
callbacks = [CSVLogger(log_file2), ModelCheckpoint(filepath=model_path2, monitor='val_binary_accuracy', mode='max'
                                                   ,save_best_only=True, verbose=0)]

In [None]:
# More recommend way
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer= keras.optimizers.RMSprop(lr=1e-4),
              metrics = [keras.metrics.BinaryAccuracy()])

In [None]:
history2 = model.fit(x=x_train_in, y=train_labels_enc,
                    validation_data=(x_val_in, val_labels_enc),
                    epochs=EPOCH, verbose=1, callbacks=callbacks)

In [None]:
epoch_list = list(range(1,EPOCH+1))
y_train_acc = history2.history['binary_accuracy']
y_val_acc = history2.history['val_binary_accuracy']
y_train_loss = history2.history['loss']
y_val_loss = history2.history['val_loss']

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,4))
t = f.suptitle('Pre-Trained InceptionResNetV2-C (Fine Tuning-1) Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

ax1.plot(epoch_list, y_train_acc, label='Train Accuracy')
ax1.plot(epoch_list, y_val_acc, label='Validation Accuracy')
ax1.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax1.set_ylim(0.4,1)
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc='best')

ax2.plot(epoch_list, y_train_loss, label='Train Loss')
ax2.plot(epoch_list, y_val_loss, label='Validation Loss')
ax2.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax2.set_ylim(0,1)
ax2.set_ylabel('Cross Entropy')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l1 = ax2.legend(loc='best')

## Fine Tuning 2

In [None]:
# Fine tune from this layer onwards, 1/5 of total layers
fine_tune_at = 11

# Freeze all the layers before the 'fine_tune_at' layer
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

In [None]:
# Let's check whether the layers are already trainable
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers[10:]]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
fig_train3 = 'InceptionResNetV2_C_FT2_train'
log_file3 = os.path.join(results_dir, 'InceptionResNetV2_C_FT2_train.csv')
model_path3 = os.path.join(models_dir, 'InceptionResNetV2_C_FT2.h5')

In [None]:
# Create a new model on top

inputs = tf.keras.Input(shape=INPUT_SHAPE)
# We make sure that the base_model is running in inference mode here,
# by passing 'training=False'. This is important for fine-tuning
x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)

# A Dense classifier with a single unit (binary classification)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

In [None]:
model.summary()

In [None]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
callbacks = [CSVLogger(log_file3), ModelCheckpoint(filepath=model_path3, monitor='val_binary_accuracy', mode='max', 
                                                  save_best_only=True, verbose=0)]

In [None]:
# More recommend way
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer= keras.optimizers.RMSprop(lr=1e-5),
              metrics = [keras.metrics.BinaryAccuracy()])

In [None]:
history3 = model.fit(x=x_train_in, y=train_labels_enc,
                    validation_data=(x_val_in, val_labels_enc),
                    epochs=EPOCH,verbose=1, callbacks=callbacks)

In [None]:
epoch_list = list(range(1,EPOCH+1))
y_train_acc = history3.history['binary_accuracy']
y_val_acc = history3.history['val_binary_accuracy']
y_train_loss = history3.history['loss']
y_val_loss = history3.history['val_loss']

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,4))
t = f.suptitle('Pre-Trained InceptionResNetV2-C (Fine Tuning-2) Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

ax1.plot(epoch_list, y_train_acc, label='Train Accuracy')
ax1.plot(epoch_list, y_val_acc, label='Validation Accuracy')
ax1.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax1.set_ylim(0.4,1)
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc='best')

ax2.plot(epoch_list, y_train_loss, label='Train Loss')
ax2.plot(epoch_list, y_val_loss, label='Validation Loss')
ax2.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax2.set_ylim(0,1)
ax2.set_ylabel('Cross Entropy')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l1 = ax2.legend(loc='best')

## Fine Tuning 3

In [None]:
# Fine tune from this layer onwards, 1/5 of total layers
fine_tune_at = 11

# Freeze all the layers before the 'fine_tune_at' layer
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

In [None]:
# Let's check whether the layers are already trainable
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers[10:]]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

In [None]:
fig_train4 = 'InceptionResNetV2_C_FT3_train'
log_file4 = os.path.join(results_dir, 'InceptionResNetV2_C_FT3_train.csv')
model_path4 = os.path.join(models_dir, 'InceptionResNetV2_C_FT3.h5')

In [None]:
# Create a new model on top

inputs = tf.keras.Input(shape=INPUT_SHAPE)
# We make sure that the base_model is running in inference mode here,
# by passing 'training=False'. This is important for fine-tuning
x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)

# A Dense classifier with a single unit (binary classification)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

In [None]:
callbacks = [CSVLogger(log_file4), ModelCheckpoint(filepath=model_path4, monitor='val_binary_accuracy', mode='max', 
                                                  save_best_only=True, verbose=0)]

In [None]:
# More recommend way
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer= keras.optimizers.RMSprop(lr=1e-5),
              metrics = [keras.metrics.BinaryAccuracy()])

In [None]:
history4 = model.fit(x=x_train_in, y=train_labels_enc,
                    validation_data=(x_val_in, val_labels_enc),
                    epochs=EPOCH,verbose=1, callbacks=callbacks)

In [None]:
epoch_list = list(range(1,EPOCH+1))
y_train_acc = history4.history['binary_accuracy']
y_val_acc = history4.history['val_binary_accuracy']
y_train_loss = history4.history['loss']
y_val_loss = history4.history['val_loss']

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,4))
t = f.suptitle('Pre-Trained InceptionResNetV2-C (Fine Tuning-3) with Augmentation Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

ax1.plot(epoch_list, y_train_acc, label='Train Accuracy')
ax1.plot(epoch_list, y_val_acc, label='Validation Accuracy')
ax1.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax1.set_ylim(0.4,1)
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc='best')

ax2.plot(epoch_list, y_train_loss, label='Train Loss')
ax2.plot(epoch_list, y_val_loss, label='Validation Loss')
ax2.set_xticks(np.arange(0, EPOCH+1, x_axis_inc))
ax2.set_ylim(0,1)
ax2.set_ylabel('Cross Entropy')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l1 = ax2.legend(loc='best')

# Testing Model

In [None]:
from sklearn.metrics import roc_curve, auc
from tensorflow.keras.models import load_model
import model_utils as mu

In [None]:
TEST_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Test_Set/data_test/'
TEST_LABEL_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/Test_Set/RFMiD_Testing_Labels.csv'

MODEL_PATH = 'D:/TA_Hannah Georgina_1822007/dataset/models/InceptionResNet/'

IMG_DIM = (299,299)

In [None]:
test_df = pd.read_csv(TEST_LABEL_PATH)
test_labels = test_df[['ID', 'Disease_Risk']]
target_test_labels = test_labels['Disease_Risk']
test_labels['image_path'] =  test_labels.apply(lambda row: (TEST_PATH + str(row['ID']) + '.png'), axis=1)
test_labels.head()

In [None]:
# Load test dataset
test_data = np.array([img_to_array(load_img(img, target_size=IMG_DIM))
                       for img in test_labels['image_path'].values.tolist()]).astype('float32')

In [None]:
print('Testing Dataset Size:', test_data.shape)

In [None]:
x_test_new = test_data.astype('int')

In [None]:
x_test_in = prep_in(x_test_new)

In [None]:
test_labels_enc = target_test_labels.to_numpy()

## Feature Extraction Model

In [None]:
model_fe = os.path.join(MODEL_PATH, 'InceptionResNetV2_C_FE.h5')

In [None]:
model1 = load_model(model_fe)

In [None]:
model1.summary()

In [None]:
#predict the probability across all output classes
prob = model1.predict(x_test_in)
yhat1 = (prob > 0.5).astype('int')
yhat_p = prob[:, prob.shape[1]-1]

In [None]:
df_fe= mu.prob_csv(test_labels_enc, prob, head_model='InceptionResNetV2_C_FE')
df_fe.head()

In [None]:
acc_a_fe, prec_a_fe, rec_a_fe, f1_a_fe, cm_a_fe = mu.test_model(target_test_labels, yhat1)

In [None]:
fpr1, tpr1, _ = roc_curve(target_test_labels, yhat_p)
auroc1 = auc(fpr1, tpr1)
print(auroc1)

## Fine Tuning 1

In [None]:
model_ft1 = os.path.join(MODEL_PATH, 'InceptionResNetV2_C_FT1.h5')

In [None]:
model2 = load_model(model_ft1)

In [None]:
model2.summary()

In [None]:
#predict the probability across all output classes
prob2 = model2.predict(x_test_in)
yhat2 = (prob2 > 0.5).astype('int')
yhat_p2 = prob2[:, prob2.shape[1]-1]

In [None]:
df_fe= mu.prob_csv(test_labels_enc, prob2, head_model='InceptionResNetV2_C_FT1')
df_fe.head()

In [None]:
acc_a_ft1, prec_a_ft1, rec_a_ft1, f1_a_ft1, cm_a_ft1 = mu.test_model(target_test_labels, yhat2)

In [None]:
fpr2, tpr2, _ = roc_curve(target_test_labels, yhat_p2)
auroc2 = auc(fpr2,tpr2)
print(auroc2)

## Fine Tuning 2

In [None]:
model_ft2 = os.path.join(MODEL_PATH, 'InceptionResNetV2_C_FT2.h5')

In [None]:
model3 = load_model(model_ft2)

In [None]:
model3.summary()

In [None]:
#predict the probability across all output classes
prob3 = model3.predict(x_test_in)
yhat3 = (prob3 > 0.5).astype('int')
yhat_p3 = prob3[:, prob3.shape[1]-1]

In [None]:
df_fe= mu.prob_csv(test_labels_enc, prob3, head_model='InceptionResNetV2_C_FT2')
df_fe.head()

In [None]:
acc_a_ft2, prec_a_ft2, rec_a_ft2, f1_a_ft2, cm_a_ft2 = mu.test_model(target_test_labels, yhat3)

In [None]:
fpr3, tpr3, _ = roc_curve(target_test_labels, yhat_p3)
auroc3 = auc(fpr3,tpr3)
print(auroc3)

## FIne Tuning 3

In [None]:
model_ft3 = os.path.join(MODEL_PATH, 'InceptionResNetV2_C_FT3.h5')

In [None]:
model4 = load_model(model_ft3)

In [None]:
model4.summary()

In [None]:
#predict the probability across all output classes
prob4 = model4.predict(x_test_in)
yhat4 = (prob4 > 0.5).astype('int')
yhat_p4 = prob4[:, prob4.shape[1]-1]

In [None]:
df_fe= mu.prob_csv(test_labels_enc, prob4, head_model='InceptionResNetV2_C_FT3')
df_fe.head()

In [None]:
acc_a_ft3, prec_a_ft3, rec_a_ft3, f1_a_ft3, cm_a_ft3 = mu.test_model(target_test_labels, yhat4)

In [None]:
fpr4, tpr4, _ = roc_curve(target_test_labels, yhat_p4)
auroc4 = auc(fpr4,tpr4)
print(auroc4)

In [None]:
plt.style.use("seaborn")

lw = 1.5

plt.plot(fpr1, tpr1, color='blue', lw = lw, label='FE (area = %0.4f)' % auroc1)
plt.plot(fpr2, tpr2, color='green', lw = lw, label='FT1 (area = %0.4f)' % auroc2)
plt.plot(fpr3, tpr3, color='orange', lw = lw, label='FT2 (area = %0.4f)' % auroc3)
plt.plot(fpr4, tpr4, color='crimson', lw = lw, label='FT3 (area = %0.4f)' % auroc4)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Comparison of InceptionResNetV2 C Performance')
plt.legend(loc="lower right")
plt.show()