### Preparing Working Env

In [1]:
import matplotlib.pyplot as plt
import numpy as np

from importlib.util import find_spec
if find_spec("core") is None:
    import sys
    sys.path.append('..')

import tensorflow as tf
import tensorflow_datasets as tfds
import random
from core.datasets import RetinaDataset
from core.datasets.data_util import preprocess_image, preprocess_for_train
from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
from core.networks.resnet_with_conv import resnetconv
from core.networks.resnet_with_conv_finetune import resnetconvfinetune
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import seaborn as sns
import os
from pathlib import Path
from core.models.base import WEIGHTS_DIRNAME
import pandas as pd
from sklearn.decomposition import PCA
import plotly
import plotly.express as px
import umap
import tf_explain
from IPython.display import clear_output
import warnings
warnings.filterwarnings('ignore')

In [2]:
#This code snippet helps if your computer has RTX 2070 GPU. If not then comment this cell.
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.config.run_functions_eagerly(True)

### Utility Functions

In [3]:
def resize_image(img, lb):
  return tf.image.resize(img, (IMG_SIZE,IMG_SIZE)), tf.one_hot(lb, NCLASS)

def augment_image(img, lb):
  img, lb = resize_image(img, lb)
  return preprocess_for_train(img, height=IMG_SIZE, width=IMG_SIZE), lb

In [4]:
def save_training_history(history,train_type,data_fraction,batch_number):
    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(TRAINING_HISTORY_PATH+"\{}\DataFraction_{}%_batch_{}_type_{}_metric_accuracy.png".format(data_fraction,data_fraction,batch_number,train_type))
    plt.clf()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(TRAINING_HISTORY_PATH+"\{}\DataFraction_{}%_batch_{}_type_{}_metric_loss.png".format(data_fraction,data_fraction,batch_number,train_type))
    plt.close()

In [5]:
def save_projection_viz(data_fraction,batch_number,model):
    idx = 6  # index of desired layer
    inputs = tf.keras.layers.Input((IMG_SIZE,IMG_SIZE,IMG_CH))
    x = tf.keras.applications.resnet_v2.preprocess_input(inputs)
    for layer in model.layers[3:idx+1]:
        x = layer(x)
    new_model = tf.keras.Model(inputs, x)
    proj_testset = new_model.predict(ds_test.map(resize_image).batch(32))
    n_components = 3
    l2 = np.square(proj_testset).mean(axis=-1, keepdims=True)**0.5
    proj_normed = proj_testset / l2
    proj_normed_pcas = PCA(n_components=n_components).fit_transform(proj_normed)
    cols = []
    for i in range(n_components):
        cols.append('PCA{}'.format(i+1))
    df = pd.DataFrame(proj_normed_pcas, columns=cols)
    df['targets'] = test_targets
    sns_plot = sns.kdeplot(x ='PCA1', y='PCA2', data= df, hue='targets', palette= sns.color_palette()[0:4])
    fig = sns_plot.get_figure()
    fig.savefig(PROJECTIONS_PATH+"\{}\Projections_PCA_2D_DataFraction_{}%_batch_{}.png".format(data_fraction,data_fraction,batch_number))
    fig = px.scatter_3d(df, x='PCA1', y='PCA2', z='PCA3', color='targets')
    plotly.offline.plot(fig, filename=PROJECTIONS_PATH+"\{}\Projections_PCA_3D_DataFraction_{}%_batch_{}.html".format(data_fraction,data_fraction,batch_number))
    plt.close()
    metric = 'cosine'
    reducer = umap.UMAP(n_components= n_components, metric=metric, n_neighbors= 50)
    proj_normed_umap = reducer.fit_transform(proj_normed)
    
    cols = []
    for i in range(n_components):
        cols.append('UMAP{}'.format(i+1))
    df = pd.DataFrame(proj_normed_umap, columns=cols)
    df['targets'] = test_targets
    sns_plot = sns.kdeplot(x ='UMAP1', y='UMAP2', data= df, hue='targets', palette= sns.color_palette()[0:4])
    fig = sns_plot.get_figure()
    fig.savefig(PROJECTIONS_PATH+"\{}\Projections_UMAP_2D_DataFraction_{}%_batch_{}.png".format(data_fraction,data_fraction,batch_number))
    fig = px.scatter_3d(df, x='UMAP1', y='UMAP2', z='UMAP3', color='targets')
    plotly.offline.plot(fig, filename=PROJECTIONS_PATH+"\{}\Projections_UMAP_3D_DataFraction_{}%_batch_{}.html".format(data_fraction,data_fraction,batch_number))
    plt.close()

In [6]:
def explain_predictions(data_fraction,batch_number,image_list,label_list,model,layer_name='conv2d_2'):
    global label_names
    for i in range(len(image_list)):
        image = image_list[i]
        label = label_list[i]
        label_name = label_names[label.numpy()]
        resized_img = tf.image.resize(image, (IMG_SIZE,IMG_SIZE))
        resized_img = tf.keras.preprocessing.image.img_to_array(resized_img)
        expanded_img = np.expand_dims(resized_img, axis=0)
        prediction = np.argmax(model.predict(expanded_img))
        prediction_acc = np.max(model.predict(expanded_img))
        predicted_label = label_names[prediction]

        data = ([resized_img.astype('uint8')], None)
        explainer = tf_explain.core.grad_cam.GradCAM()
        grid = explainer.explain(data, model, class_index=label, layer_name=layer_name,image_weight=0.9)

        explainer_occ = tf_explain.core.occlusion_sensitivity.OcclusionSensitivity()
        grid_occ = explainer_occ.explain(data, model, class_index=label, patch_size=4)

        f, ax = plt.subplots(1,3,figsize = (8,8))
        f.suptitle("True label: " + label_name+", "+"Predicted label: " + predicted_label+", "+"Predicted Accuracy: " + str(prediction_acc), fontsize=15)
        ax[0].set_title("Original Image")
        ax[0].imshow(resized_img.astype('uint8'))
        ax[1].set_title("Grad-CAM")
        ax[1].imshow(grid)
        ax[2].set_title("Occlusion Sensitivity")
        ax[2].imshow(grid_occ)
        plt.tight_layout()
        plt.subplots_adjust(top=1.5)
        plt.savefig(GRADCAM_PATH+"\{}\GradCAM_IMG_{}_DataFraction_{}%_batch_{}.png".format(data_fraction,i,data_fraction,batch_number))
        plt.close()

### Constants

In [7]:
NCLASS   = 4
IMG_SIZE = 224
IMG_CH = 3
BATCH_SIZE = 32
EPOCHS = 100
TOTAL_ITERATIONS = 5
random.seed(7)
BASE_PATH = Path(os.getcwd()).parent
CHECKPOINTS_PATH = str(BASE_PATH.joinpath('core/experiment_results/checkpoints/model_weights.h5'))
TRAINING_HISTORY_PATH= str(BASE_PATH.joinpath('core/experiment_results/training_history/'))
CONFUSION_MATRIX_PATH = str(BASE_PATH.joinpath('core/experiment_results/confusion_matrix/'))
PROJECTIONS_PATH = str(BASE_PATH.joinpath('core/experiment_results/projections/'))
GRADCAM_PATH = str(BASE_PATH.joinpath('core/experiment_results/gradcam/'))

### Preparing the Data

In [8]:
ds_test, ds_test_info   = tfds.load('RetinaDataset', split='test', shuffle_files=False, as_supervised=True,with_info=True)
test_targets = [t.numpy() for t in ds_test.map(lambda img, lb: lb).batch(BATCH_SIZE)]
test_targets = np.hstack(test_targets)
label_names = ds_test_info.features['label'].names

In [9]:
# This is used for all the GradCAM Images
i = 0
images_for_gradcam = []
lables_for_gradcam = []
for image, label in ds_test:
    images_for_gradcam.append(image)
    lables_for_gradcam.append(label)
    if i >= 2:
        break
    i+=1

In [10]:
metrics_store = pd.DataFrame()
def get_results_for_data_fraction(SAMPLE_SIZE):
    global test_targets, label_names, EPOCHS, BATCH_SIZE, images_for_gradcam, lables_for_gradcam, metrics_store, TOTAL_ITERATIONS
    for ITER_COUNT in range(TOTAL_ITERATIONS):
        print("Execution for batch: "+str(ITER_COUNT))
        if SAMPLE_SIZE==100:
            ds_train, ds_train_info = tfds.load('RetinaDataset', split='train[:98%]', as_supervised=True,with_info=True)
            ds_val, ds_val_info     = tfds.load('RetinaDataset', split='train[-2%:]', as_supervised=True,with_info=True)
        else:
            start_idx = int(ITER_COUNT*SAMPLE_SIZE)
            end_idx = start_idx+SAMPLE_SIZE
            ds_train, ds_train_info = tfds.load('RetinaDataset', split='train[{}%:{}%]'.format(start_idx,end_idx), as_supervised=True,with_info=True)
            ds_val, ds_val_info     = tfds.load('RetinaDataset', split='train[-15%:]'.format(SAMPLE_SIZE), as_supervised=True,with_info=True)
        ds_train_augment = ds_train.map(augment_image)
        ds_val = ds_val.map(resize_image)

        print("Computing weights for the classes.")
        y_labels = []
        labels = ds_train_augment.map(lambda x, y: y)
        for l in labels.batch(BATCH_SIZE).as_numpy_iterator():
          y_labels.append(l)
        y_labels = np.vstack(y_labels)
        y_labels.sum(axis=0)

        class_weights = compute_class_weight('balanced', [0, 1, 2, 3], y_labels.argmax(axis=1))
        class_weights = {i: w for i, w in enumerate(class_weights)}

        print("Training the classifier.")
        model = resnetconv(input_shape = (IMG_SIZE, IMG_SIZE, IMG_CH), output_shape = (NCLASS,))
        metrics = ['accuracy']
        callbacks = [tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', ),
                     tf.keras.callbacks.ModelCheckpoint(filepath=CHECKPOINTS_PATH,monitor='val_accuracy',save_best_only=True),]
        optimizer = tf.keras.optimizers.Adam(lr=0.002)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)
        history = model.fit(ds_train_augment.batch(BATCH_SIZE), validation_data = ds_val.batch(BATCH_SIZE), callbacks = callbacks, class_weight = class_weights, epochs=EPOCHS, verbose=1)
        save_training_history(history,'train',SAMPLE_SIZE,ITER_COUNT)

        print("Fine-tuning the classifier.")
        model.layers[3].trainable = True
        optimizer = tf.keras.optimizers.Adam(lr=0.00005)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)
        history_finetune = model.fit(ds_train_augment.batch(BATCH_SIZE), validation_data = ds_val.batch(BATCH_SIZE), callbacks = callbacks, class_weight = class_weights, epochs=EPOCHS, verbose=1)
        save_training_history(history_finetune,'finetune',SAMPLE_SIZE,ITER_COUNT)

        print("Model Evaluation.")
        logits_testset = model.predict(ds_test.map(resize_image).batch(BATCH_SIZE))
        ytest_pred = logits_testset.argmax(axis=-1)

        print("Saving artifacts.")
        ax= plt.subplot()
        sns_plot = sns.heatmap(confusion_matrix(test_targets, ytest_pred),annot=True,xticklabels=label_names,yticklabels=label_names,fmt='g', ax = ax)
        ax.set_xlabel('Predicted labels')
        ax.set_ylabel('True labels')
        ax.set_title('Confusion Matrix')
        fig = sns_plot.get_figure()
        fig.savefig(CONFUSION_MATRIX_PATH+"\{}\ConfusionMatrix_DataFraction_{}%_batch_{}.png".format(SAMPLE_SIZE,SAMPLE_SIZE,ITER_COUNT))
        plt.close()
        accuracy = accuracy_score(test_targets, ytest_pred)
        precision = precision_score(test_targets, ytest_pred, average='weighted')
        recall = recall_score(test_targets, ytest_pred, average='weighted')
        f1_sc = f1_score(test_targets, ytest_pred, average='weighted')
        curr_metrics = pd.DataFrame({'Data_Fraction':SAMPLE_SIZE,'Batch_Number':ITER_COUNT,'Accuracy':accuracy,'Precision':precision,'Recall':recall,'F1_Score':f1_sc},index=[0])
        metrics_store = metrics_store.append(curr_metrics,ignore_index=True)

        # Saving the Model
        model.save_weights(str(WEIGHTS_DIRNAME)+"\{}\Supervised_ResNet_DataFraction_{}%_batch_{}_TestAcc_{}.h5".format(SAMPLE_SIZE,SAMPLE_SIZE,ITER_COUNT,int(accuracy*100)))

        print("Saving Projections")
        save_projection_viz(SAMPLE_SIZE,ITER_COUNT,model)

        print("Saving Saliency Maps")
        explain_predictions(SAMPLE_SIZE,ITER_COUNT,images_for_gradcam,lables_for_gradcam,model,layer_name=model.layers[-4].name)

In [11]:
data_fractions_list = [10] # 1%, 5%, 10%
for fraction in data_fractions_list:
#     clear_output(wait=True)
    get_results_for_data_fraction(fraction)

Execution for batch: 0
Computing weights for the classes.
Training the classifier.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Fine-tuning the classifier.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/1

In [12]:
metrics_store

Unnamed: 0,Data_Fraction,Batch_Number,Accuracy,Precision,Recall,F1_Score
0,10,0,0.992769,0.992849,0.992769,0.992768
1,10,1,0.994835,0.99489,0.994835,0.994822
2,10,2,0.984504,0.984576,0.984504,0.984504
3,10,3,0.984504,0.985409,0.984504,0.98455
4,10,4,0.983471,0.984001,0.983471,0.983442


In [13]:
metrics_store.groupby('Data_Fraction').mean()

Unnamed: 0_level_0,Batch_Number,Accuracy,Precision,Recall,F1_Score
Data_Fraction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,2,0.988017,0.988345,0.988017,0.988017


#### END