<a href="https://colab.research.google.com/github/jaden-2/Assignment/blob/master/RestNet_and_VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import gc
import glob
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, StratifiedKFold
from tqdm import tqdm

from keras import optimizers
import keras.backend as K
from keras.callbacks import *
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import *
from keras.models import Model, load_model, save_model
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.vgg16 import VGG16


%matplotlib inline

## Problem 1: Code review

<h2>Difference between RestNet and Unet</h3>
   <ul>
   <li>The RestNet was designed primarily for image classification tasks.</li>
   <li>Composed of convolutional layers with skip connections (or residual connections) that allow the network to learn residual functions with reference to the layer inputs, which helps in training very deep networks.</li>
   </ul>


<h2> How is transfer learning done?<h2>

<ol>Transfer learning can be done in the following ways:
<li>Transfer learning is done by using a pre-trained ResNet model from a library like TensorFlow, PyTorch, or Keras</li>
<li>
Freeze the weights of the initial layers to retain the pre-trained features. This prevents the weights from being updated during the training process.
</li>
</ol>


#### Losses

In [2]:
from keras.losses import binary_crossentropy

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score


def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)


# IoU metric for observation during training
# https://www.kaggle.com/cpmpml/fast-iou-metric-in-numpy-and-tensorflow
def get_iou_vector(A, B):
    # Numpy version
    batch_size = A.shape[0]
    metric = 0.0
    for batch in range(batch_size):
        t, p = A[batch], B[batch]
        true = np.sum(t)
        pred = np.sum(p)

        # deal with empty mask first
        if true == 0:
            metric += (pred == 0)
            continue

        # non empty mask case.  Union is never empty
        # hence it is safe to divide by its number of pixels
        intersection = np.sum(t * p)
        union = true + pred - intersection
        iou = intersection / union

        # iou metrric is a stepwise approximation of the real iou over 0.5
        iou = np.floor(max(0, (iou - 0.45)*20)) / 10

        metric += iou

    # teake the average over all images in batch
    metric /= batch_size
    return metric


def my_iou_metric(label, pred):
    return tf.numpy_function(get_iou_vector, [label, pred>0.5], tf.float64)


# For Lovash loss
def my_iou_metric_2(label, pred):
    return tf.numpy_function(get_iou_vector, [label, pred >0], tf.float64)

### Decoder

In [3]:
# Basic decoder block with Conv, BN and PReLU activation.
def decoder_block_simple(
        layer_name, block_name,
        num_filters=32,
        conv_dim=(3, 3)):

    x_dec = Conv2D(
        num_filters, conv_dim,
        padding='same',
        name='{}_conv'.format(block_name))(layer_name)
    x_dec = BatchNormalization(
        name='{}_bn'.format(block_name))(x_dec)
    x_dec = PReLU(
        name='{}_activation'.format(block_name))(x_dec)

    return x_dec

# Decoder block with bottleneck architecture, where middle conv layer
# is half the size of first and last, in order to compress representation.
# This type of architecture is supposed to retain most useful information.
def decoder_block_bottleneck(
        layer_name, block_name,
        num_filters=32,
        conv_dim=(3, 3),
        dropout_frac=0.2):

    x_dec = Conv2D(
        num_filters, conv_dim,
        padding='same',
        name='{}_conv1'.format(block_name))(layer_name)
    x_dec = BatchNormalization(
        name='{}_bn1'.format(block_name))(x_dec)
    x_dec = PReLU(
        name='{}_activation1'.format(block_name))(x_dec)
    x_dec = Dropout(dropout_frac)(x_dec)

    x_dec2 = Conv2D(
        num_filters // 2, conv_dim,
        padding='same',
        name='{}_conv2'.format(block_name))(x_dec)
    x_dec2 = BatchNormalization(
        name='{}_bn2'.format(block_name))(x_dec2)
    x_dec2 = PReLU(
        name='{}_activation2'.format(block_name))(x_dec2)
    x_dec2 = Dropout(dropout_frac)(x_dec2)

    x_dec2 = Conv2D(
        num_filters, conv_dim,
        padding='same',
        name='{}_conv3'.format(block_name))(x_dec2)
    x_dec2 = BatchNormalization(
        name='{}_bn3'.format(block_name))(x_dec2)
    x_dec2 = PReLU(
        name='{}_activation3'.format(block_name))(x_dec2)
    x_dec2 = Dropout(dropout_frac)(x_dec2)

    x_dec2 = Add()([x_dec, x_dec2])

    return x_dec2

## Problem 2: Code rewriting

### Model: VGG16

In [60]:

def unet_vgg(input_size, decoder_block,
             weights='imagenet',
             loss_func='binary_crossentropy',
             metrics_list=[my_iou_metric],
             use_lovash=False):

    inputs = Input(input_size)

    # Base model - encoder
    base_model = VGG16(
        input_tensor=inputs,
        include_top=False,
        weights=weights
    )

    # Layers for feature extraction in the encoder part
    encoder1 = base_model.get_layer('block1_conv2').output
    encoder2 = base_model.get_layer('block2_conv2').output
    encoder3 = base_model.get_layer('block3_conv3').output
    encoder4 = base_model.get_layer('block4_conv3').output
    encoder5 = base_model.get_layer('block5_conv3').output

    # Center block
    center = decoder_block(
        encoder5, 'center', num_filters=512)
    concat5 = concatenate([center, encoder5], axis=-1)

    # Decoder part.
    # Every decoder block processes concatenated output from encoder and decoder part.
    # This creates skip connections.
    # Afterwards, decoder output is upsampled to dimensions equal to encoder output part.
    decoder4 = decoder_block(
        concat5, 'decoder4', num_filters=256)
    concat4 = concatenate([UpSampling2D(size=(2, 2))(decoder4), encoder4], axis=-1)

    decoder3 = decoder_block(
        concat4, 'decoder3', num_filters=128)
    concat3 = concatenate([UpSampling2D(size=(2, 2))(decoder3), encoder3], axis=-1)

    decoder2 = decoder_block(
        concat3, 'decoder2', num_filters=64)
    concat2 = concatenate([UpSampling2D(size=(2, 2))(decoder2), encoder2], axis=-1)

    decoder1 = decoder_block(
        concat2, 'decoder1', num_filters=64)
    concat1 = concatenate([UpSampling2D(size=(2, 2))(decoder1), encoder1], axis=-1)

    # Final upsampling and decoder block for segmentation.
    output = UpSampling2D(size=(2, 2))(concat1)
    output = decoder_block(
        output, 'decoder_output', num_filters=32)
    output = Conv2D(
        1, (1, 1), activation=None, name='prediction')(output)

    # Upsample the output to match the input size
    #output = UpSampling2D(size=(2, 2))(output)  # Add this line
    output = MaxPooling2D(pool_size=(2, 2))(output)  # Replace the upsampling line with this

    if not use_lovash:
        output = Activation('sigmoid')(output)

    model = Model(inputs, output)
    model.compile(loss=loss_func, optimizer='adam', metrics=metrics_list)

    return model


### Model: RestNet

In [61]:
# Model is parametrized in a way to enable easy change of decoder_block type,
# as this is an argument that can be given a function, like decoder_block_simple.
def unet_restnet(input_size, decoder_block,
                weights='imagenet',
                loss_func='binary_crossentropy',
                metrics_list=[my_iou_metric],
                use_lovash=False):

    # Base model - encoder
    base_model = ResNet50(
        input_shape=input_size,
        include_top=False,
        weights=weights)


    # Layers for feature extraction in the encoder part
    encoder1 = base_model.get_layer('conv1_relu').output # activation_1
    encoder2 = base_model.get_layer('conv2_block3_out').output # activation_10
    encoder3 = base_model.get_layer('conv3_block4_out').output  # activation_22
    encoder4 = base_model.get_layer('conv4_block6_out').output  # activation_40
    encoder5 = base_model.get_layer('conv5_block3_out').output # activation_40

    # Center block
    center = decoder_block(
        encoder5, 'center', num_filters=512)
    concat5 = concatenate([center, encoder5], axis=-1)

    # Decoder part.
    # Every decoder block processed concatenated output from encoder and decoder part.
    # This creates skip connections.
    # Afterwards, decoder output is upsampled to dimensions equal to encoder output part.
    decoder4 = decoder_block(
        concat5, 'decoder4', num_filters=256)
    concat4 = concatenate([UpSampling2D()(decoder4), encoder4], axis=-1)

    decoder3 = decoder_block(
        concat4, 'decoder3', num_filters=128)
    concat3 = concatenate([UpSampling2D()(decoder3), encoder3], axis=-1)

    decoder2 = decoder_block(
        concat3, 'decoder2', num_filters=64)
    concat2 = concatenate([UpSampling2D()(decoder2), encoder2], axis=-1)

    decoder1 = decoder_block(
        concat2, 'decoder1', num_filters=64)
    concat1 = concatenate([UpSampling2D()(decoder1), encoder1], axis=-1)

    # Final upsampling and decoder block for segmentation.
    output = UpSampling2D()(concat1)
    output = decoder_block(
        output, 'decoder_output', num_filters=32)
    output = Conv2D(
        1, (1, 1), activation=None, name='prediction')(output)
    if not use_lovash:
        output = Activation('sigmoid')(output)

    model = Model(base_model.input, output)
    model.compile(loss=loss_func, optimizer='adam', metrics=metrics_list)

    return model

## Problem 3: Training and comparison

### Loading train data

In [5]:
from google.colab import drive

drive.mount("/content/drive")

%cd drive/MyDrive/project/train

Mounted at /content/drive
/content/drive/MyDrive/project/train


In [6]:
image_path_list = glob.glob("images/*")
X_train=[]
y_train=[]

for image_path in image_path_list[:100]:
    image = cv2.resize(cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)/255, (256, 256))
    X_train.append(image)
    mask = cv2.resize(cv2.imread(image_path.replace("images","masks"), cv2.IMREAD_GRAYSCALE)/255, (256, 256))
    y_train.append(mask)
X_train=np.array(X_train)
y_train=np.array(y_train)
X_train = np.repeat(np.expand_dims(X_train, axis=-1), 3, -1)
y_train = np.expand_dims(y_train, axis=-1)



# Training

In [7]:
input_size = (256, 256, 3)


### RestNet training

In [62]:
model_restnet = unet_restnet(
    input_size, decoder_block_bottleneck, weights='imagenet',
    loss_func=bce_dice_loss, metrics_list=[my_iou_metric],
    use_lovash=False)
print(model_restnet.summary())


model_checkpoint = ModelCheckpoint(
    'unet_resnet.h5' ,monitor='val_my_iou_metric', mode='max',
    save_best_only=True, save_weights_only=True, verbose=1)

reduce_lr = ReduceLROnPlateau(
    monitor='val_my_iou_metric',
    mode='max',
    factor=0.5,
    patience=5,
    min_lr=0.0001,
    verbose=1)

model_restnet.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
epochs = 10  # 25
batch_size = 16

history = model_restnet.fit(X_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[model_checkpoint,reduce_lr],
                    verbose=1)

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_27 (InputLayer)       [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 262, 262, 3)          0         ['input_27[0][0]']            
                                                                                                  
 conv1_conv (Conv2D)         (None, 128, 128, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 128, 128, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       



Epoch 2/10



Epoch 3/10



Epoch 4/10



Epoch 5/10



Epoch 6/10



Epoch 7/10



Epoch 8/10



Epoch 9/10



Epoch 10/10





### VGG training


In [65]:
K.clear_session()
model_vgg = unet_vgg(
    input_size, decoder_block_bottleneck, weights='imagenet',
    loss_func=bce_dice_loss, metrics_list=[my_iou_metric],
    use_lovash=False)
print(model_vgg.summary())


model_checkpoint = ModelCheckpoint(
    'unet_resnet.h5' ,monitor='val_my_iou_metric', mode='max',
    save_best_only=True, save_weights_only=True, verbose=1)

reduce_lr = ReduceLROnPlateau(
    monitor='val_my_iou_metric',
    mode='max',
    factor=0.5,
    patience=5,
    min_lr=0.0001,
    verbose=1)

model_vgg.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
epochs = 10  # 25
batch_size = 6

history = model_vgg.fit(X_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[model_checkpoint,reduce_lr],
                    verbose=1)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 256, 256, 64)         1792      ['input_1[0][0]']             
                                                                                                  
 block1_conv2 (Conv2D)       (None, 256, 256, 64)         36928     ['block1_conv1[0][0]']        
                                                                                                  
 block1_pool (MaxPooling2D)  (None, 128, 128, 64)         0         ['block1_conv2[0][0]']        
                                                                                              



Epoch 2/10



Epoch 3/10



Epoch 4/10



Epoch 5/10



Epoch 6/10



Epoch 7/10



Epoch 8/10



Epoch 9/10



Epoch 10/10





## Performance report

<table>
<th>
<td>Model</td>
<td>Accuracy</td>
<td>Batch size</td>
<td>Comment</td>
</th>

<tr>
<td>1</td>
<td>RestNet50</td>
<td>90.70%</td>
<td>10</td>
<td>This is a smaller network, it trained faster and out performed the VGG 16</td>
</tr>

<tr>
<td>2</td>
<td>VGG16</td>
<td>83.33%</td>
<td>6</td>
<td>Compared to the RestNet this a large model. I trained slowly and kept encountering Graph execution error. As a result I had to reduce the bach size</td>
</tr>
</table>