This Notebook demostrates the implementation of a Unet Architecture as mentioned on the Original Paper

In [None]:
import tensorflow as tf
import keras
import os
import random
import numpy as np
from tqdm import tqdm 
from skimage.io import imread, imshow
from skimage.transform import resize
import matplotlib.pyplot as plt

The Basic Unet Architecture

In [None]:
# Creating a input layer
inputs = tf.keras.layers.Input((128,128,3))

# Rescaling the image input to the range of 0-1
inp = tf.keras.layers.Lambda(lambda x: x / 255)(inputs)

# The encoding layers
# Encoding is performed by successive convolution layer followed by a Max Pooling Layer to reduce the spatial resolution (Downsample the Image)

# Weights of the convolution kernels are initialized based on normal distribution, these kernel weights will be updated during the model training 
# The Encoding Path Consists of 4 successive convolution blocks as implemented below

# First convolution block
c1 = tf.keras.layers.Conv2D(16, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(inp)
c1 = tf.keras.layers.Conv2D(16, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c1)
p1 = tf.keras.layers.MaxPooling2D((2,2))(c1)

# Second covolution block
c2 = tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(p1)
c2 = tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c2)
p2 = tf.keras.layers.MaxPooling2D((2,2))(c2)

# Third covolution block
c3 = tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(p2)
c3 = tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c3)
p3 = tf.keras.layers.MaxPooling2D((2,2))(c3)

# Forth covolution block
c4 = tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(p3)
c4 = tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c4)
p4 = tf.keras.layers.MaxPooling2D((2,2))(c4)

# A bridge is needed to connect the Encoder block to the Decoder block, here a convolution layer is used to create a bridge connection
c5 = tf.keras.layers.Conv2D(256, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(p4)
c5 = tf.keras.layers.Conv2D(256, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c5)

# Decoding is performed by up-sampling, concatination of the upsampled layer to the corresponding convolution layer of the encoder and then performing convolution operation on the concatinated output
# The Encoder and Decoder have the same number of building blocks

# First Decoding Layer 
u6 = tf.keras.layers.Conv2DTranspose(128, (2,2), strides=(2,2), padding = 'same')(c5)
u6 = tf.keras.layers.concatenate([u6,c4])
c6 = tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(u6)
c6 = tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c6)

# Second Decoding Layer
u7 = tf.keras.layers.Conv2DTranspose(64, (2,2), strides=(2,2), padding = 'same')(c6)
u7 = tf.keras.layers.concatenate([u7,c3])
c7 = tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(u7)
c7 = tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c7)

# Third Decoding Layer
u8 = tf.keras.layers.Conv2DTranspose(32, (2,2), strides=(2,2), padding = 'same')(c7)
u8 = tf.keras.layers.concatenate([u8,c2])
c8 = tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(u8)
c8 = tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c8)

# Fourth Decoding Layer
u9 = tf.keras.layers.Conv2DTranspose(16, (2,2), strides=(2,2), padding = 'same')(c8)
u9 = tf.keras.layers.concatenate([u9,c1])
c9 = tf.keras.layers.Conv2D(16, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(u9)
c9 = tf.keras.layers.Conv2D(16, (3,3), activation = 'relu', padding ='same', kernel_initializer = 'he_normal')(c9)

# Finally the output layer will be a 1*1 convolution to create a mask which is equal in size to the original image, the mask generated by the output layer is a binary mask and sigmoid activation function is used, a softmax function could also be used but will require extra steps to conver the generated mask to a binarized form
output = tf.keras.layers.Conv2D(1, (1,1), activation = 'sigmoid')(c9)


The Defined Model will be trained on a sample dataset publicly available on Kaggle, the link for the dataset is on the repo description

In [None]:
#seed is defined for the repeatiability of the respective opeation
seed = 42
np.random.seed = seed

# These will be used to rescale the image to fit the model input requirements
IMG_WIDTH = 128
IMG_HEIGHT = 128
IMG_CHANNELS = 3

#train and test path of the images, this is defined according to path mentioned on the dataset
TRAIN_PATH = 'data-science-bowl-2018/stage1_train/'
TEST_PATH = 'data-science-bowl-2018/stage1_test/'

# Create a iterator to scan in folder for files and subfolders
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]

#creating empty numpy arrays, each file is read and the array is updated according, the final array will have a size of (128, 128, 3, N) for the trainign images, N being the number of images loaded
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)

print('Resizing training images and masks')
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):   
    path = TRAIN_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]  
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img  #Fill empty X_train with values from img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)

    # In the dataset same image has multiple mask with one mask file containing one segmented part, this combines all the mask to a single mask
    for mask_file in next(os.walk(path + '/masks/'))[2]:
        mask_ = imread(path + '/masks/' + mask_file)
        mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',  
                                      preserve_range=True), axis=-1)
        mask = np.maximum(mask, mask_)  
            
    Y_train[n] = mask   

# Creating test images
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
sizes_test = []
print('Resizing test images') 
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

print('Done!')


#Printing out sample images from the prepared dataset
image_x = random.randint(0, len(train_ids))
imshow(X_train[image_x])
plt.show()
imshow(np.squeeze(Y_train[image_x]))
plt.show()

Training the model

In [None]:
model = tf.keras.Model(inputs=[inputs], outputs=[output])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.IoU(num_classes=2, target_class_ids=[0])])

results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=25)


Creating prediction on the images

In [None]:
idx = random.randint(0, len(X_train))


preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
preds_test = model.predict(X_test, verbose=1)

 
preds_train_t = (preds_train > 0.5).astype(np.uint8)
preds_val_t = (preds_val > 0.5).astype(np.uint8)
preds_test_t = (preds_test > 0.5).astype(np.uint8)

# Perform a sanity check on some random validation samples
ix = random.randint(0, len(preds_val_t))
imshow(X_train[int(X_train.shape[0]*0.9):][ix])
imshow(np.squeeze(preds_val_t[ix]), alpha=0.1)
plt.show()