---
# Import Libraries

In [4]:
import os
import csv
import tensorflow as tf
import random

import numpy as np
import cv2

from sklearn.utils import class_weight
from tensorflow.keras import Model
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import plot_model

from collections import Counter

np.set_printoptions(precision = 5)

---
# Read Images and Augment Data Train

In [5]:
def random_crop(img):
    randx = random.randint(0, 76)
    randy = random.randint(0, 76)
    return img[randy:randy + 224, randx:randx + 224]

def read_data(folder_path, augment = False, input_size = (224,224)):
    x = []
    y = []
    for root, dirs, files in os.walk(folder_path, topdown = False):
        for name in files:
            pth = os.path.join(root, name)
            img = cv2.imread(pth)
            x.append(cv2.resize(img, input_size, interpolation = cv2.INTER_LANCZOS4))
            if('malignant' in name):
                y.append(1)
            else:
                y.append(0)
            if(augment):
                x.append(cv2.resize((cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)), input_size, interpolation = cv2.INTER_LANCZOS4))
                x.append(cv2.resize((cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)), input_size, interpolation = cv2.INTER_LANCZOS4))
                x.append(cv2.resize((cv2.rotate(img, cv2.ROTATE_180)), input_size, interpolation = cv2.INTER_LANCZOS4))
                x.append(random_crop(img))
                x.append(random_crop(cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)))
                x.append(random_crop(cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)))
                x.append(random_crop(cv2.rotate(img, cv2.ROTATE_180)))
                if('malignant' in name):
                    y.extend((1,1,1,1,1,1,1))
                else:
                    y.extend((0,0,0,0,0,0,0))
    return np.asarray(x), np.asarray(y)

In [3]:
x_train, y_train = read_data('crops_train/', augment = True)
x_val, y_val = read_data('crops_val/')

print(Counter(y_train))
print(Counter(y_val))

Counter({0: 20360, 1: 6288})
Counter({0: 432, 1: 318})


In [4]:
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
    
x_train = (x_train - 100.090965) / 64.16994
x_val = (x_val - 100.090965) / 64.16994

---
# Classification Model

In [6]:
def build_model(input_shape = (224, 224, 3), weights = None):

    # model = tf.keras.applications.vgg16.VGG16(weights = weights, include_top = False, input_shape = input_shape)
    # model = tf.keras.applications.vgg19.VGG19(weights = weights, include_top = False, input_shape = input_shape)
    # model = tf.keras.applications.inception_v3.InceptionV3(weights = weights, include_top = False, input_shape = input_shape)
    # model = tf.keras.applications.resnet_v2.ResNet50V2(weights = weights, include_top = False, input_shape = input_shape)
    model = tf.keras.applications.resnet_v2.ResNet101V2(weights = weights, include_top = False, input_shape = input_shape)

    x = model.layers[-1].output  
    x = GlobalAveragePooling2D()(x)
    predictions = Dense(1, activation = 'sigmoid')(x)

    patch_model = Model(inputs = model.input, outputs = predictions)
  
    return patch_model

In [7]:
patch_model = build_model()

## Model Visualization

In [8]:
patch_model.summary()

relu[0][0]       
__________________________________________________________________________________________________
conv4_block18_out (Add)         (None, 14, 14, 1024) 0           conv4_block17_out[0][0]          
                                                                 conv4_block18_3_conv[0][0]       
__________________________________________________________________________________________________
conv4_block19_preact_bn (BatchN (None, 14, 14, 1024) 4096        conv4_block18_out[0][0]          
__________________________________________________________________________________________________
conv4_block19_preact_relu (Acti (None, 14, 14, 1024) 0           conv4_block19_preact_bn[0][0]    
__________________________________________________________________________________________________
conv4_block19_1_conv (Conv2D)   (None, 14, 14, 256)  262144      conv4_block19_preact_relu[0][0]  
___________________________________________________________________________________________

## Data Augmentation

In [7]:
datagen = ImageDataGenerator(
    horizontal_flip = True,
    vertical_flip = True
)

## Callbacks

In [8]:
early_stopping = EarlyStopping(monitor = 'val_auc', patience = 3, mode = 'max')

model_checkpoint = ModelCheckpoint(filepath = 'checkpoint.h5', 
                               monitor = 'val_auc',
                               mode = 'max',
                               save_best_only = True,
                               verbose = 1
                              )

## Compile Model

In [14]:
patch_model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-5),
    metrics = ['accuracy', tf.keras.metrics.AUC(name = 'auc')]
  )

## Class Weights

In [10]:
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = dict(enumerate(class_weights))
print(class_weights)

{0: 0.6547244094488189, 1: 2.1157760814249365}


## Train Model

In [15]:
batch_size = 32
epochs = 50

augmented_train = datagen.flow(
    x_train, y_train, batch_size
)

patch_model.fit(
    augmented_train,
    validation_data = (x_val, y_val),
    epochs = epochs,
    class_weight = class_weights,
    callbacks = [early_stopping, model_checkpoint],
    verbose = 2
)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 832 steps, validate on 765 samples
Epoch 1/50

Epoch 00001: val_auc improved from -inf to 0.89080, saving model to checkpoint.h5
832/832 - 483s - loss: 0.3090 - accuracy: 0.8643 - auc: 0.9440 - val_loss: 0.7949 - val_accuracy: 0.8405 - val_auc: 0.8908
Epoch 2/50

Epoch 00002: val_auc did not improve from 0.89080
832/832 - 461s - loss: 0.1438 - accuracy: 0.9492 - auc: 0.9873 - val_loss: 1.3148 - val_accuracy: 0.8209 - val_auc: 0.8894
Epoch 3/50

Epoch 00003: val_auc improved from 0.89080 to 0.89895, saving model to checkpoint.h5
832/832 - 465s - loss: 0.0965 - accuracy: 0.9682 - auc: 0.9940 - val_loss: 1.3082 - val_accuracy: 0.8170 - val_auc: 0.8989
Epoch 4/50

Epoch 00004: val_auc improved from 0.89895 to 0.90959, saving model to checkpoint.h5
832/832 - 466s - loss: 0.0698 - accuracy: 0.9772 - auc: 0.9965 - val_loss: 0.9724 - val_accuracy: 0.8235 - val_auc: 0.9096
Epoch 5/50

Epoch 00005: val_auc improved from 0.90959 to 0.923

<tensorflow.python.keras.callbacks.History at 0x7f3c3471db90>