<a href="https://colab.research.google.com/github/daliaezzat/ASOC_paper_code/blob/main/Phase_3_and_Phase_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
if (not tf.__version__.startswith('2')): #Checking if tf 2.0 is installed
    print('Please install tensorflow 2.0 to run this notebook')
print('Tensorflow version: ',tf.__version__)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import urllib.request
from sklearn.model_selection import train_test_split
import tensorflow_probability as tfp
%matplotlib inline
plt.style.use('default')

print("TFP Version", tfp.__version__)
print("TF  Version",tf.__version__)

In [None]:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

In [None]:
!pip install PyDrive

In [None]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
download = drive.CreateFile({'id': '1cA_rOXord2kQgzyxCNGfJi4CYOdDvG7F'})
download.GetContentFile('breast_cancer.zip')

In [None]:
import zipfile
import io
data = zipfile.ZipFile('breast_cancer.zip', 'r')
data.extractall()

In [None]:
train_dir = '/content/ratio_IDC3_breasr_cancer/train'
validation_dir = '/content/ratio_IDC3_breasr_cancer/validation'
test_dir = '/content/ratio_IDC3_breasr_cancer/test'

In [None]:
train_batch_size=32
validation_batch_size=32
image_size=280

#----------------------------------------------- data preprocessing ---------------------------------------
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint,EarlyStopping

train_datagen=ImageDataGenerator(rescale = 1./255.,
                                 rotation_range = 10,
                                 width_shift_range = 0.2,
                                 height_shift_range = 0.2,
                                 shear_range = 0.3,
                                 zoom_range = 0.25,
                                 horizontal_flip = True,
                                fill_mode='nearest')

test_datagen=ImageDataGenerator(rescale=1./255)
train_generator=train_datagen.flow_from_directory(train_dir,
                                                  target_size=(image_size,image_size),
                                                  batch_size=train_batch_size,
                                                  class_mode='categorical')
validation_generator=test_datagen.flow_from_directory(validation_dir,
                                                      target_size=(image_size,image_size),
                                                      batch_size=validation_batch_size,
                                                      shuffle=True,
                                                      class_mode='categorical')


#----------------------------------------------model------------------------------------------------------
from tensorflow.keras import layers,optimizers
from tensorflow.keras import models
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2



from tensorflow.keras.applications import ResNet101V2
conv_base = ResNet101V2(weights='imagenet',include_top=False,input_shape=(image_size,image_size, 3))
conv_base.summary()

for i, layer in enumerate(conv_base.layers):
   print(i, layer.name)

conv_base.trainable=False
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dropout(0.263))
model.add(layers.Dense(2,activation='softmax'))

reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-5)
checkpointer = ModelCheckpoint(filepath='pretrained.weights.best.hdf5',monitor='val_accuracy', verbose = 1, save_best_only=True)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=1, mode='auto')
model.compile(optimizer=optimizers.legacy.Adam(learning_rate=2e-5),loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

In [None]:
from sklearn.utils import compute_class_weight
import numpy as np
train_classes=train_generator.classes
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(train_classes),
                                        y = train_classes
                                     )
class_weights = dict(zip(np.unique(train_classes), class_weights)),
class_weights

In [None]:
history=model.fit_generator(train_generator,
                    steps_per_epoch=train_generator.samples/train_generator.batch_size,
                    epochs=50,
                    validation_data=validation_generator,
                    validation_steps=validation_generator.samples/train_generator.batch_size,
                    verbose=1,
                    class_weight=class_weights,
                    callbacks=[checkpointer,early,reduce_lr])

In [None]:
for layer in conv_base.layers:
     layer.training = False
     if isinstance(layer, tf.keras.layers.BatchNormalization):
         layer._per_input_updates = {}
     elif isinstance(layer, tf.keras.layers.Dropout):
         layer._per_input_updates = {}

for layer in conv_base.layers[:-8]:
    layer.trainable=False

for layer in conv_base.layers[-8:]:
     layer.trainable=True

checkpointer = ModelCheckpoint(filepath='pretrained.weights.best.hdf5',monitor='val_accuracy', verbose = 1, save_best_only=True)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=1, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-7)
model.compile(optimizer=optimizers.legacy.Adam(learning_rate=1e-6),loss='categorical_crossentropy',metrics=['accuracy'])

model.summary()

history=model.fit_generator(train_generator,
                     steps_per_epoch=train_generator.samples/train_generator.batch_size,
                     epochs=50,
                     validation_data=validation_generator,
                     validation_steps=validation_generator.samples/train_generator.batch_size,
                     verbose=1,
                     class_weight=class_weights,
                     callbacks=[checkpointer,early,reduce_lr])

In [None]:
import tensorflow.keras.backend as K
model_mc_pred = K.function([model.input, K.learning_phase()], [model.output])

In [None]:
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(image_size, image_size),
        batch_size=4163,
        class_mode='categorical',
        shuffle=False)

In [None]:
labels=np.array(["0","1"])

In [None]:
# Store the data in X_train, y_train variables by iterating over the batches
batch_size=8
test_generator.reset()
x_test, y_test = next(test_generator)
for i in tqdm(range(int(len(test_generator) / batch_size) - 1)): #1st batch is already fetched before the for loop.
  img, label = next(test_generator)
  x_test = np.append(x_test, img, axis = 0)
  y_test = np.append(y_test, label, axis = 0)
print(x_test.shape, y_test.shape)

In [None]:
#no dropout at test time
for i in range(0,5):
  print(model_mc_pred([x_test[0:1],0])[0])

In [None]:
#dropout at test time
for i in range(0,5):
  print(model_mc_pred([x_test[0:1],1])[0])

In [None]:
#dropout at test time
for i in range(0,5):
  print(model_mc_pred([x_test[800:801],1])[0])

In [None]:
pred_mc=np.zeros((len(x_test),2))
pred_max_p_mc=np.zeros((len(x_test)))
pred_std_mc=np.zeros((len(x_test)))
entropy_mc = np.zeros((len(x_test)))

for i in tqdm(range(0,len(x_test))):
  multi_img=np.tile(x_test[i],(50,1,1,1))
  preds=model_mc_pred([multi_img,1])
  pred_mc[i]= np.mean(preds,axis=1)
  pred_max_p_mc[i]=np.argmax(np.mean(preds,axis=1))#mean over n runs of every proba class
  pred_std_mc[i]= np.sqrt(np.sum(np.var(preds, axis=1)))
  entropy_mc[i] = -np.sum( pred_mc[i] * np.log2(pred_mc[i] + 1E-14)) #Numerical Stability
pred_labels_mc=np.array([labels[np.argmax(pred_mc[i])] for i in range(0,len(pred_mc))])
pred_mc_mean_max_p=np.array([pred_mc[i][np.argmax(pred_mc[i])] for i in range(0,len(pred_mc))])
nll_mc=-np.log(pred_mc_mean_max_p)

In [None]:
entropy_mc

In [None]:
sum(entropy_mc) / len(entropy_mc)

In [None]:
pred_std_mc

In [None]:
sum(pred_std_mc) / len(pred_std_mc)

In [None]:
true_labels= test_generator.classes

In [None]:
l = dict((v,k) for k,v in test_generator.class_indices.items())
true_labels = np.array([l[k] for k in true_labels])

In [None]:
print(true_labels)

In [None]:
print(pred_labels_mc)

In [None]:
test_acc_all_mc=np.average(true_labels==pred_labels_mc)
test_acc_all_mc

In [None]:
correct_indices = np.nonzero(pred_labels_mc == true_labels)[0]
incorrect_indices = np.nonzero(pred_labels_mc != true_labels)[0]
print(len(correct_indices)," classified correctly")
print(len(incorrect_indices)," classified incorrectly")

In [None]:
# adapt figure size to accomodate 18 subplots
plt.rcParams['figure.figsize'] = (7,14)
figure_evaluation = plt.figure()
# plot incorrect predictions
for i, incorrect in enumerate(incorrect_indices[:6]):
    plt.subplot(6,3,i+10)
    plt.imshow(x_test[incorrect].reshape(280,280,3), cmap='gray', interpolation='none')
    plt.title(
      "Predicted {}, Truth: {}, entropy (uncertainty): {}, prob: {}, , std: {}".format(pred_labels_mc[incorrect],
                                                        true_labels[incorrect],
                                                        entropy_mc[incorrect],
                                                        pred_mc[incorrect],
                                                        pred_std_mc[incorrect]))
    plt.xticks([])
    plt.yticks([])
    plt.show()
# figure_evaluation

In [None]:
# adapt figure size to accomodate 18 subplots

plt.rcParams['figure.figsize'] = (7,14)

figure_evaluation = plt.figure()

# plot incorrect predictions
for i, correct in enumerate(correct_indices[:6]):
    plt.subplot(6,3,i+10)
    plt.imshow(x_test[correct].reshape(280,280,3), cmap='gray', interpolation='none')
    plt.title(
      "Predicted {}, Truth: {}, entropy (uncertainty): {}, prob: {}, , std: {}".format(pred_labels_mc[correct],
                                                        true_labels[correct],
                                                        entropy_mc[correct],
                                                        pred_mc[correct],
                                                        pred_std_mc[correct]))
    plt.xticks([])
    plt.yticks([])
    plt.show()
# figure_evaluation

In [None]:
#Confution Matrix and Classification Report
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')
print(confusion_matrix(true_labels,pred_labels_mc))
print('Classification Report')
target_names = ['0', '1']
print(classification_report(true_labels,pred_labels_mc, target_names=target_names))

In [None]:
errors = np.where(pred_labels_mc != true_labels)[0]
print("No of errors = {}/{}".format(len(errors),test_generator.samples))

In [None]:
y_true=[int(x) for x in true_labels]
y_pred=[int(x) for x in pred_labels_mc]

In [None]:
import sklearn.metrics as metrics
fpr, tpr, tresholds = metrics.roc_curve(y_true, y_pred)

In [None]:
roc_auc = metrics.auc(fpr, tpr)
# method I: plt
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (5,5)
figure_evaluation = plt.figure()
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.plot(fpr, tpr, 'b', label = 'ROC curve(area= %0.2f)' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate (TPR)')
plt.xlabel('False Positive Rate (FPR)')
plt.show()