In [None]:
import json
import math
import os
import os.path

import cv2
from PIL import Image
import numpy as np
from keras import layers
#from keras.applications import ResNet50,MobileNet, DenseNet201, InceptionV3, NASNetLarge, InceptionResNetV2, NASNetMobile
from tensorflow.keras.applications import ResNet50,ResNet50V2, MobileNet, DenseNet201, InceptionV3, NASNetLarge, InceptionResNetV2, NASNetMobile
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
#from keras.optimizers import Adam
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from sklearn import metrics
from collections import Counter
import json
import itertools

In [None]:
####################################################
# Base directory
####################################################

from google.colab import drive
drive.mount('/content/drive/',  force_remount=True)

base_path = '/content/drive/My Drive/cancer_detection/2022/datasets/2022_2/224px_level0_full/'

train_dir = base_path + '/patch_train'
val_dir = base_path + '/patch_val'
test_dir = base_path + '/patch_test'

negative_train = train_dir + '/0'
positive_train = train_dir + '/1'
negative_val = val_dir +'/0'
positive_val = val_dir + '/1'

In [None]:
patchSize = 224

In [None]:
####################################################
# Image Generator and Data Augmentation Configs
####################################################

data_augmentation = ImageDataGenerator(
    rotation_range = 90,
    horizontal_flip=True,  # randomly flip images
    vertical_flip=True,  # randomly flip images
)

In [None]:
train_generator = data_augmentation.flow_from_directory(
    directory=train_dir,
    target_size=(patchSize, patchSize),
    color_mode="rgb",
    batch_size=224,
    class_mode="binary",
    shuffle=True,
    seed=None
)

print(train_generator)

In [None]:
print(train_generator.class_indices)
print(train_generator.class_mode)
print(train_generator.classes)
print(train_generator.num_classes)
print(train_generator.total_batches_seen)

In [None]:
val_generator = data_augmentation.flow_from_directory(
    directory=val_dir,
    target_size=(patchSize, patchSize),
    color_mode="rgb",
    batch_size=224,
    class_mode="binary",
    shuffle=True,
    seed=None
)

print(val_generator)

In [None]:
base_path_test = '/content/drive/My Drive/cancer_detection/2022/datasets/2022_2/224px_level0_complete/'

test_dir = base_path_test + '/patch_test'

test_generator = data_augmentation.flow_from_directory(
    directory=test_dir,
    target_size=(patchSize, patchSize),
    color_mode="rgb",
    batch_size=224,
    class_mode="binary",
    shuffle=True,
    seed=None
)

print(test_generator)

In [None]:
def build_model(modelBase, lr):
    model = Sequential()
    model.add(modelBase)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(1, activation='softmax'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=lr),
        metrics=['accuracy']
    )
    
    return model

In [None]:
K.clear_session()
gc.collect()

denseNet = DenseNet201(
    weights='imagenet',
    include_top=False,
    input_shape=(patchSize,patchSize,3)
)

resnet = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(patchSize,patchSize,3)
)

resnetv2 = ResNet50V2(
    weights='imagenet',
    include_top=False,
    input_shape=(patchSize,patchSize,3)
)

In [None]:
#######################################################################
# FREEZING - get all layers in resnetv2 and set trainable = False
#######################################################################

for layer in resnetv2.layers:
    if layer.trainable == True:
        layer.trainable = False

for layer in resnetv2.layers:
  if layer.trainable == False:
     print(layer.name)

In [None]:
#######################################################################
# UNFREEZING - get layer by name in resnetv2 and set trainable = True
#######################################################################

for layer in resnetv2.layers:
  if layer.name == 'conv5_block3_3_conv':
     layer.trainable = True

for layer in resnetv2.layers:
  if layer.trainable == True:
     print(layer.name)

In [None]:
batch_size = 224
num_epochs = 50
learning_rate = 1e-4
model = build_model(resnetv2, learning_rate)

In [None]:
####################################################
# SET FILEPATH TO SAVE
####################################################

model_dir = base_path
model_name = 'model_resnet50_224_level0_009.h5' 
filepath = os.path.join(model_dir, 'checkpoint', model_name)

print(filepath)

In [None]:
#######################################################################
# CALLBACKS - Learning Rate Reducer & ModelCheckpoint & EarlyStopping
#######################################################################

# ReduceLROnPlateau: Learning Rate Reducer when a metric has stopped improving. 
# Models often benefit from reducing the learning rate by a factor of 2–10 once 
# learning stagnates. This callback monitors a quantity and if no improvement 
# is seen for a ‘patience’ number of epochs, the learning rate is reduced.


learn_control = ReduceLROnPlateau(monitor='val_accuracy',
                                  patience=5,
                                  verbose=1,
                                  factor=0.2,
                                  min_lr=1e-7)

# ModelCheckpoint: When training requires a lot of time to achieve a good result, 
# often many iterations are required. In this case, it is better to save a copy 
# of the best performing model only when an epoch that improves the metrics ends.

checkpoint = ModelCheckpoint(filepath, 
                             monitor='val_accuracy', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='max'
                             )

# EarlyStopping Too many epochs can lead to overfitting of the training dataset, 
# whereas too few may result in an underfit model. Early stopping is a method that 
# allows you to specify an arbitrary large number of training epochs and stop training 
# once the model performance stops improving on a hold out validation dataset.

early_stopping = EarlyStopping(
                  monitor="val_loss",
                  min_delta=0,
                  patience=25,
                  verbose=1,
                  mode="auto",
                  baseline=None,
                  restore_best_weights=False,
              )

In [None]:
####################################################
# TRAINING
####################################################

model_history = model.fit(
                      train_generator,
                      steps_per_epoch=train_generator.n // batch_size,
                      epochs=num_epochs,
                      validation_data=val_generator,
                      validation_steps=val_generator.n // batch_size, 
                      class_weight={0: 5, 1: 1},
                      callbacks=[learn_control, checkpoint, early_stopping])

In [None]:
####################################################
# Saving Model
####################################################

model_dir = base_path
model_name = 'save_model_resnet50_224_level0_009.h5'
model.save(os.path.join(model_dir, 'checkpoint', model_name))

In [None]:
history_df = pd.DataFrame(model_history.history)
history_df[['accuracy', 'val_accuracy']].plot()

In [None]:
history_df = pd.DataFrame(model_history.history)
history_df[['loss', 'val_loss']].plot()

In [None]:
####################################################
# LOAD HEIGHTS TO RESULTS
####################################################

model.load_weights(filepath)

In [None]:
val_pred = model.predict(val_generator)

val_true = val_generator.classes[val_generator.index_array]
val_predict = np.argmax(val_pred, axis=1)

print(val_true)
print(val_predict)

In [None]:
accuracy_score(val_true, val_predict)

In [None]:
test_pred = model.predict(test_generator)

test_true = test_generator.classes[test_generator.index_array]
test_predict = np.argmax(test_pred, axis=1)

print(test_true)
print(test_predict)

In [None]:
tta_steps = 10
predictions = []


for i in tqdm(range(tta_steps)):
    preds = model.predict(data_augmentation.flow(test_generator,
                                                         batch_size=batch_size,
                                                         shuffle=False),
                                                         steps=math.ceil(test_generator.samples / batch_size)
                                    )
    
    predictions.append(preds)
    gc.collect()
    
labels_test_pred_tta = np.mean(predictions, axis=0)

In [None]:
####################################################
# CONFUSION MATRIX
####################################################

from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, 
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, sem normalizacao')

    print(cm)

    plt.imshow(cm, 
               interpolation='nearest', 
               cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=55)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,
                 i,
                 format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('Real (rotulado)')
    plt.xlabel('Previsto (pelo modelo)')
    plt.tight_layout()

cm = confusion_matrix(test_true, test_predict)

cm_plot_label =['Negativo', 'Positivo']
plot_confusion_matrix(cm, cm_plot_label, title ='Matriz de confusão para metastase em Câncer de mama')

In [None]:
####################################################
# Classification report
####################################################

from sklearn.metrics import classification_report

c = classification_report(test_true, test_predict)
print(c)

In [None]:
####################################################
# ROC CURVE
####################################################

from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import roc_curve

roc_log = roc_auc_score(test_true, test_predict)
false_positive_rate, true_positive_rate, threshold = roc_curve(test_true, test_predict)
area_under_curve = auc(false_positive_rate, true_positive_rate)

plt.plot([0, 1], [0, 1], 'r--')
plt.plot(false_positive_rate, true_positive_rate, label='AUC = {:.3f}'.format(area_under_curve))
plt.xlabel('Falso positivo rate')
plt.ylabel('Verdadeiro positivo rate')
plt.title('Curva ROC')
plt.legend(loc='best')
plt.show()
plt.close()