# Refine net for image segmentation used on Kaggle competition
### Paper: https://arxiv.org/pdf/1611.06612.pdf
### Competition: https://www.kaggle.com/c/data-science-bowl-2018

In [1]:
import ref_net as rf

import os
import sys
import random
import warnings

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import chain
import skimage
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

from keras.models import Model, load_model

from keras.layers import Input, Activation, ZeroPadding2D, Add
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose, ZeroPadding2D

from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.initializers import glorot_uniform
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf

# Set some parameters
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
TRAIN_PATH = 'dataset/stage1_train'
TEST_PATH = 'dataset/stage1_test'

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')
seed = 17
random.seed = seed
np.random.seed = seed

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Creating building blocks for RefineNet

## ResNet block for encoding
#### X_prev -> BN -> ReLU -> Weights(conv 3x3, pad = same) -> BN -> ReLU -> Weights(conv 3x3, pad = same) -> X -> Adding(X_prev, X)

In [2]:
def ResNetBlock(X_prev, filters):
    """
        X_prev -- the weights layers from the previous layer with the shape(n_h, n_w, n_c)
        filters -- the python integer of channels number(it shoud be equal to n_c)
        
        returns:
        X -- weights layer with the shape(n_h, n_w, n_c)
    """    
#     (n_h, n_w) = X_prev.shape[:2]
#     X_bat = Input((n_h, n_w, filters))
    X_prev = Conv2D(filters, kernel_size=(1,1), strides=(1,1), kernel_initializer='he_normal')(X_prev)
    X_shortcut = X_prev
    
    
    X = BatchNormalization(axis=3)(X_prev)
    X = Activation('relu')(X)
    X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), kernel_initializer='he_normal', padding='same')(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), kernel_initializer='he_normal', padding='same')(X)
    
    # shortcut connection
    X = Add()([X, X_shortcut])
    
    return X

### The structure of net
#### RCU -> Multi-resolution Unit -> Chained Residual Pooling

In [3]:
def RCU(X_prev, filters):
    """
            Residual Conv Unit 
        ReLU -> Conv 3x3 -> ReLU -> Conv 3x3 -> identity function(X, X_prev)
        
        X_prev -- weight from the corresponding unit of UNet
        filters -- the python integer of channels number, better to take the quantity from the prev layer
        
        
        UPD: ADDING BOTTLENECK
    """
    X_prev = Conv2D(filters, kernel_size=(1,1), strides=(1,1), kernel_initializer='he_normal')(X_prev)
    X_shortcut = X_prev
    
    X = Activation('relu')(X_prev)
    X = Conv2D(np.int32(filters/4), (1,1), kernel_initializer='he_normal')(X)
    X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), kernel_initializer='he_normal', padding='same')(X)
    
    X = Activation('relu')(X)
    
    X = Conv2D(np.int32(filters/4), (1,1), kernel_initializer='he_normal')(X)
    X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), kernel_initializer='he_normal', padding='same')(X)
    
    X = Add()([X, X_shortcut])
    
    return X

In [26]:
def multi_resolution(X_prev, filters, upsample=True, up=2):
    """
        Multi-resolution Unit
            if the input is unitary - skip the unit
        Conv 3x3 -> Upsampling by the factor of 2 if the input is of different shapes -> Sum the results to one matrix
        Parameters:
        X_prev -- output from RCU
        filters -- n_c from the prev channel
        up -- the size we need input image to mulitply
        upsample -- boolean which define the need to upsample the image to the size of the biggest one
    """
    if upsample == True:
        X = Conv2D(np.int32(filters/4), (1,1), kernel_initializer='he_normal')(X_prev)
        X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer='he_normal')(X)
        X = Conv2DTranspose(filters, (up,up), strides=(up,up), padding = 'same')(X)
    else:
        X = Conv2D(np.int32(filters/4), (1,1), kernel_initializer='he_normal')(X_prev)
        X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer='he_normal')(X)    
    
    return X
        

In [5]:
def add_multi_resolution(X1, X2):
    """
        Function for adding two tensors for the purpose of Multi-resolution Unit
    """
    X = Add()([X1, X2])
    return X

In [6]:
def chain_pool(X_prev, filters):
    """
        Chained residual pooling unit
           ReLU(X_prev) -> (5x5 MaxPool -> 3x3 Conv)x2 == X -> Sum(ReLU(X_prev), X)
           
    """
    X_prev = Activation('relu')(X_prev)
    X = MaxPooling2D((5,5), strides=(1,1), padding = 'same')(X_prev)
    X = Conv2D(np.int32(filters/4), (1,1), kernel_initializer='he_normal')(X)
    X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer='he_normal')(X)
    
#     X = MaxPooling2D((5,5), strides=(1,1), padding = 'same')(X)
#     X = Conv2D(filters, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer='he_normal')(X)
    
    X = Add()([X, X_prev])
    return X

In [27]:
def RefNet(input_shape=(256,256,3), verbose=1):
    
    # SOME CONSTANTS
    F4 = 512  # FILTERS FOR REFNET-4
    F = 256   # FOR ELSE
    
    X_input = Input(input_shape)
    s = Lambda(lambda x: x / 255) (X_input)
    
    ################# STAGE OF ENCODING BY UNET
    # Stage 1
    c1_1 = Conv2D(8, (3,3), strides=(1,1), padding='same', kernel_initializer='he_normal', activation='relu')(s)
    c1_2 = Conv2D(8, (3,3), strides=(1,1), padding='same', kernel_initializer='he_normal', activation='relu')(c1_1)
    m1 = MaxPooling2D((2,2))(c1_2)
    
    # Stage 2-6 ResNet blocks
    rs_2 = ResNetBlock(m1, 16)
    m2 = MaxPooling2D((2,2))(rs_2)
    
    rs_3 = ResNetBlock(m2, 32)
    m3 = MaxPooling2D((2,2))(rs_3)
    
    rs_4 = ResNetBlock(m3, 64)
    m4 = MaxPooling2D((2,2))(rs_4)
    
    rs_5 = ResNetBlock(m4, 128)
    m5 = MaxPooling2D((2,2))(rs_5)
    
    rs_6 = ResNetBlock(m5, 256)
    
    ################# END STAGE OF ENCODING, GO TO REFNET
    
    ### REFNET-4 INPUT 1/32
    # ADAPTIVE CONV FOR RS_6 OR 1/32 OF IMAGE, UNITARY INPUT
    # 512 filters for refnet-4 for else -- 256
    rcu_6_1 = RCU(rs_6, F)
    # rcu_6_2 = RCU(rcu_6_1, F4)
    
    # SKIP MULTI RESOLUTION UNIT #
    
    # CHAIN POOLING
    ch_res_pool_6 = chain_pool(rcu_6_1, F)

    # OUTPUT CONV RCU
    out_conv_6 = RCU(ch_res_pool_6, F)
    
    #### END REFNET-4
    
    ### REFNET-3 MULTIPLE INPUT: 1/32(out_conv_6), 1/16(rs_5)
    
        # ADAPTIVE CONV RCU
    rcu_5_1_sm = RCU(out_conv_6, F)
    
    rcu_5_1_bg = RCU(rs_5, F)
    
        # MULTIRESOLUTION UNIT
    ml_res_5_sm = multi_resolution(rcu_5_1_sm, F, upsample=True)
    ml_res_5_bg = multi_resolution(rcu_5_1_bg, F, upsample=False)
    
        # ADDING ML OUTPUTS
    ml_res_5_add = add_multi_resolution(ml_res_5_sm, ml_res_5_bg)
    
        # CHAIN POOLING
    ch_res_pool_5 = chain_pool(ml_res_5_add, F)
    
        # OUTPUT CONV RCU
    out_conv_5 = RCU(ch_res_pool_5, F)
    
    #### END REFINE-NET-3 OUTPUT SHAPE: 16X16X256
    
    ### REFNET-2 MULTIPLE INPUT: 1/16(out_conv_5), 1/8(rs_4)
        # ADAPTIVE CONV RCU
    rcu_4_1_sm = RCU(out_conv_5, F)
    
    rcu_4_1_bg = RCU(rs_4, F)
    
        # MULTIRESOLUTION UNIT
    ml_res_4_sm = multi_resolution(rcu_4_1_sm, F, upsample=True)
    ml_res_4_bg = multi_resolution(rcu_4_1_bg, F, upsample=False)
    
        # ADDING ML OUTPUTS
    ml_res_4_add = add_multi_resolution(ml_res_4_sm, ml_res_4_bg)
    
        # CHAIN POOLING
    ch_res_pool_4 = chain_pool(ml_res_4_add, F)
    
        # OUTPUT CONV RCU
    out_conv_4 = RCU(ch_res_pool_4, F)
    
    ### END REFNET-2 OUT SHAPE: 32X32X256
    
    ### REFNET-1 MULTIPLE INPUT: 1/8(out_conv_4), 1/4(rs_3)
        # ADAPTIVE CONV RCU
    rcu_3_1_sm = RCU(out_conv_4, F)
    
    rcu_3_1_bg = RCU(rs_3, F)

    
        # MULTIRESOLUTION UNIT
    ml_res_3_sm = multi_resolution(rcu_3_1_sm, F, upsample=True)
    ml_res_3_bg = multi_resolution(rcu_3_1_bg, F, upsample=False)
    
        # ADDING ML OUTPUTS
    ml_res_3_add = add_multi_resolution(ml_res_3_sm, ml_res_3_bg)
    
        # CHAIN POOLING
    ch_res_pool_3 = chain_pool(ml_res_3_add, F)
    
        # OUTPUT CONV RCU
    out_conv_3 = RCU(ch_res_pool_3, F)
    ### END REFNET-2 OUT SHAPE: 64 X 64 X 256
    
    # UPSAMPLING THE OUT CONV TO THE ORIG SHAPE OF INPUT 
    up_to_orig = Conv2DTranspose(64, kernel_size=(4,4), strides=(4,4), padding='same', 
                                 kernel_initializer='he_normal')(out_conv_3)
    
    outputs = Conv2D(1, (1,1), activation='sigmoid')(up_to_orig)
    
    model = Model(inputs=[X_input], outputs=[outputs])
    
    if verbose==1:
        model.summary()
    return model

# Single RefineNet

In [30]:
def SingleRefNet(input_shape=(256,256,3), verbose=1):
    
    # SOME CONSTANTS
    F = 64   # FOR ELSE
    
    X_input = Input(input_shape)
    s = Lambda(lambda x: x / 255) (X_input)
    
    ################# STAGE OF ENCODING BY UNET
    # Stage 1
    c1_1 = Conv2D(16, (3,3), strides=(1,1), padding='same', kernel_initializer='he_normal', activation='relu')(s)
    c1_2 = Conv2D(16, (3,3), strides=(1,1), padding='same', kernel_initializer='he_normal', activation='relu')(c1_1)
    m1 = MaxPooling2D((2,2))(c1_2)
    
    # Stage 2-6 ResNet blocks
    rs_2 = ResNetBlock(m1, 32)
    m2 = MaxPooling2D((2,2))(rs_2)
    
    rs_3 = ResNetBlock(m2, 64)
    m3 = MaxPooling2D((2,2))(rs_3)
    
    rs_4 = ResNetBlock(m3, 128)
    
    ################# END STAGE OF ENCODING, GO TO REFNET
    
    ### SINGLE REFBLOCK 
    ### UPD: REDUCING THE QUANTITY OF FILTERS TO F=64
    ### INPUT: c1_2 | shape: (256,256,16)
    ###        rs_2 | shape: (128,128,32)
    ###        rs_3 | shape: (64,64,64)
    ###        rs_4 | shape: (32,32,128)
    
    ### OUTPUT: pred | shape: (256,256,64)
    
    ###############
    
    # RESIDUAL CONV UNIT 2X
    rcu_5_1 = RCU(rs_4, F)
    rcu_5_2 = RCU(rcu_5_1, F)
    
    rcu_5_3 = RCU(rs_3, F)
    rcu_5_4 = RCU(rcu_5_3, F)
    
    rcu_5_5 = RCU(rs_2, F)
    rcu_5_6 = RCU(rcu_5_5, F)
    
    rcu_5_7 = RCU(c1_2, F)
    
    ###############
    
    # MULTI-RESOLUTION UNIT
    # INPUT: rcu_5_2 | shape: (32,32,64)
    #        rcu_5_4 | shape: (64,64,64)
    #        rcu_5_6 | shape: (128,128,64)
    #        rcu_5_7 | shape: (256,256,64)
    # OUTPUT: ml_out | shape: (256,256,64)
    
    ml_res_1 = multi_resolution(rcu_5_2, F, upsample=True, up=8)
    ml_res_2 = multi_resolution(rcu_5_4, F, upsample=True, up=4)
    ml_res_3 = multi_resolution(rcu_5_6, F, upsample=True, up=2)
    ml_res_4 = multi_resolution(rcu_5_7, F, upsample=False)
    
    ## ADDING
    X = add_multi_resolution(ml_res_1, ml_res_2)
    Y = add_multi_resolution(ml_res_3, ml_res_4)
    ml_out = add_multi_resolution(X, Y)
    
    ### CHAIN RESOLUTION UNIT
    ch_p = chain_pool(ml_out, F)
    
    ### RCU
    out = RCU(ch_p, F)
    
    
    ### MAKING THE PREDICTIONS

    outputs = Conv2D(1, (1,1), activation='sigmoid')(out)
    
    model = Model(inputs=[X_input], outputs=[outputs])
    
    if verbose==1:
        model.summary()
    return model

In [31]:
model = SingleRefNet()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 256, 256, 3)  0           input_12[0][0]                   
__________________________________________________________________________________________________
conv2d_109 (Conv2D)             (None, 256, 256, 16) 448         lambda_3[0][0]                   
__________________________________________________________________________________________________
conv2d_110 (Conv2D)             (None, 256, 256, 16) 2320        conv2d_109[0][0]                 
__________________________________________________________________________________________________
max_poolin

# Import data
# Define IoU and run the model

In [8]:
def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)

In [10]:
X_train, Y_train, X_test = rf.get_data()

Getting and resizing train images and masks ... 


100%|██████████| 670/670 [05:30<00:00,  2.03it/s]

Getting and resizing test images ... 



100%|██████████| 65/65 [00:01<00:00, 35.57it/s]

Done!





In [32]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[mean_iou])

In [33]:
earlystopper = EarlyStopping(patience=5, verbose=1)
checkpointer = ModelCheckpoint('models/ref_minimized.h5', verbose=1, save_best_only=True)
results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=50, 
                    callbacks=[earlystopper, checkpointer])

Train on 603 samples, validate on 67 samples
Epoch 1/50

KeyboardInterrupt: 