# Semantic Segmentation of Dogs/Cats Dataset With Numpy

### Gradient Checking: Stanford Course https://cs231n.github.io/neural-networks-3/

To be safe it is best to use a short burn-in time during which the network is allowed to learn and perform the gradient check after the loss starts to go down.
An incorrect initialization can slow down or even completely stall the learning process. Luckily, this issue can be diagnosed relatively easily. One way to do so is to plot activation/gradient histograms for all layers of the network. Intuitively, it is not a good sign to see any strange distributions - e.g. with tanh neurons we would like to see a distribution of neuron activations between the full range of [-1,1], instead of seeing all neurons outputting zero, or all neurons being completely saturated at either -1 or 1.

In [None]:
import sys
import os
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.losses import *
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras import optimizers, models
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

from tensorflow.python.ops import nn_ops
from tensorflow.python.keras.utils import conv_utils

from itertools import chain
from skimage.io import imread, imshow #, concatenate_image
from skimage.transform import resize
from skimage.morphology import label
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2
import random
import itertools
import timeit

import time
import datetime
import imageio
import PIL

import tensorflow_datasets as tfds
tfds.disable_progress_bar()

from IPython.display import clear_output
from IPython import display

In [None]:
tf.config.optimizer.set_jit(True)


physical_devices = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

policy = mixed_precision.Policy("mixed_float16")
mixed_precision.set_policy(policy)
print("Compute dtype: %s" % policy.compute_dtype)
print("Variable dtype: %s" % policy.variable_dtype)

In [None]:
path = "downloaded_datasets\\oxford_pets\\"
randomseed = 2019
img_height = 224
img_width = 224

In [None]:
def get_data(path):
    print("Path")
    
    ids_temp = next(os.walk(path + "images"))[2]
    ids_1 = []
    for i in ids_temp:
        if i.endswith(".jpg"):
            ids_1.append(i)
            
    random.seed(randomseed)
    id_order = np.arange(len(ids_1))
    np.random.shuffle(id_order)
    
    ids = []
    
    for i in range(len(id_order)):
        ids.append(ids_1[np.int(id_order[i])])
        
 
    X = np.zeros((len(ids), img_height, img_width, 3), dtype=np.float32)
    y = np.zeros((len(ids), img_height, img_width), dtype=np.float32)
    print("Number of images: " + str(len(ids)))
    print(y.shape)
    
    if y.shape[0] == 0:
        print("no image found")
        sys.exit()
        
    for n, id_ in enumerate(ids):
        print("\r Loading %s \ %s " % (n, len(ids)), end='')
        
        # load images
        img = load_img(path + "images\\" + id_)
        x_img = img_to_array(img)
        x_img = resize(x_img, (img_height, img_width, 3), mode='constant', preserve_range = True)
        
        # load masks
        id_mask = id_[:-4] + ".png"
        mask = img_to_array(load_img(path + "annotations\\trimaps\\" + id_mask, color_mode = "grayscale"))
        mask = cv2.resize(mask, (img_height, img_width), interpolation = cv2.INTER_NEAREST)
        # mask = resize(mask, (im_height, im_width, 3), mode='constant', preserve_range = True)
        mask.astype(np.int)
            
        # save images
        X[n, ...] = x_img.squeeze()
        # y[n, ...] = mask.squeeze()
        # print(mask.astype(int))
        y[n] = mask.astype(int)
        # to_categorical(mask.astype(int), 3)
        # 
            
    print("Done!")
    return np.array(X), np.array(y)

In [None]:
def expand_mask_channels(y_train, y_test):
    y_train_reshaped = np.zeros((TRAIN_LENGTH, img_width, img_height), dtype=np.float32)
    for idx, mask in enumerate(y_train):
        y_train_reshaped[idx] = cv2.resize(mask, (img_width, img_height))

    y_test_reshaped = np.zeros((TEST_LENGTH, img_width, img_height), dtype=np.float32)
    for idx, mask in enumerate(y_test):
        y_test_reshaped[idx] = cv2.resize(mask, (img_width, img_height))   


    train_masks = np.zeros((TRAIN_LENGTH, img_width, img_height, 3), dtype=np.float32)
    # train_images = X_train
    for idx, mask in enumerate(y_train):
        mask[mask < 1/255] = 0
        mask.astype(np.int)
        train_masks[idx] = to_categorical(mask, 3)
        # train_masks[idx] = mask

    test_masks = np.zeros((TEST_LENGTH, img_width, img_height, 3), dtype=np.float32)
    # test_images = X_test
    for idx, mask in enumerate(y_test):
        mask[mask < 1/255] = 0
        mask.astype(np.int)
        test_masks[idx] = to_categorical(mask, 3)
        # test_masks[idx] = mask
    return train_masks, test_masks

In [None]:
X_all, y_all = get_data(path)

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

X_train = X_train.astype(np.float32) / 255
X_test = X_test.astype(np.float32) / 255

y_train = y_train - 1
y_test = y_test - 1

In [None]:
TRAIN_LENGTH = len(X_train)
TEST_LENGTH = len(X_test)
n_classes = 3

y_train, y_test = expand_mask_channels(y_train, y_test)

In [None]:
plt.figure(figsize=(15,5))

plt.subplot(1,4,1)
plt.imshow(X_train[0])
plt.gca().axis("off")

plt.subplot(1,4,2)
plt.imshow(y_train[0])
plt.gca().axis("off")

plt.subplot(1,4,3)
plt.imshow(X_test[0])
plt.gca().axis("off")

plt.subplot(1,4,4)
plt.imshow(y_test[0])
plt.gca().axis("off")
plt.tight_layout()
plt.show()


In [None]:
K.clear_session()

In [None]:
def unet_model(input_height=img_height,  input_width=img_width, f_scale = 1):
    
    img_input = tf.keras.layers.Input(shape=(input_height, input_width, 3))

    # -------------------------- Encoder --------------------------
    
    c1 = Conv2D(f_scale*16, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(img_input)
    c1 = Conv2D(f_scale*16, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c1)
    p1 = MaxPooling2D((2,2))(c1)
    
    c2 = Conv2D(f_scale*32, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(p1)
    c2 = Conv2D(f_scale*32, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c2)
    p2 = MaxPooling2D((2,2))(c2)
    
    c3 = Conv2D(f_scale*64, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(p2)
    c3 = Conv2D(f_scale*64, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c3)
    c3 = Conv2D(f_scale*64, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c3)
    p3 = MaxPooling2D((2,2))(c3)
    
    c4 = Conv2D(f_scale*128, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(p3)
    c4 = Conv2D(f_scale*128, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c4)
    c4 = Conv2D(f_scale*128, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c4)
    p4 = MaxPooling2D((2,2))(c4)
    
    # ------------------------ Bottleneck -------------------------
    
    c5 = Conv2D(f_scale*256, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(p4)
    c5 = Conv2D(f_scale*256, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c5)
    c5 = Conv2D(f_scale*256, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c5)
    c5 = Dropout(0.5)(c5)
    
    # -------------------------- Decoder --------------------------
    
    u6 = concatenate([UpSampling2D((2, 2))(c5), c4])
    c6 = Conv2D(f_scale*128, 3, padding='same', kernel_initializer = 'he_normal')(u6)
    c6 = BatchNormalization()(c6)
    c6 = Activation("relu")(c6)
    c6 = Conv2D(f_scale*128, 3, padding='same', kernel_initializer = 'he_normal')(u6)
    c6 = BatchNormalization()(c6)
    c6 = Activation("relu")(c6)
    c6 = Conv2D(f_scale*128, 3, padding='same', activation="relu", kernel_initializer = 'he_normal')(c6)
    c6 = BatchNormalization()(c6)
    c6 = Activation("relu")(c6)

    u7 = concatenate([UpSampling2D((2, 2))(c6), c3])
    c7 = Conv2D(f_scale*64, 3, padding='same', kernel_initializer = 'he_normal')(u7)
    c7 = BatchNormalization()(c7)
    c7 = Activation("relu")(c7)
    c7 = Conv2D(f_scale*64, 3, padding='same', kernel_initializer = 'he_normal')(c7)
    c7 = BatchNormalization()(c7)
    c7 = Activation("relu")(c7)
    c7 = Conv2D(f_scale*64, 3, padding='same', kernel_initializer = 'he_normal')(c7)
    c7 = BatchNormalization()(c7)
    c7 = Activation("relu")(c7)

    u8 = concatenate([UpSampling2D((2, 2))(c7), c2])
    c8 = Conv2D(f_scale*32, 3, padding='same', kernel_initializer = 'he_normal')(u8)
    c8 = BatchNormalization()(c8)
    c8 = Activation("relu")(c8)
    c8 = Conv2D(f_scale*32, 3, padding='same', kernel_initializer = 'he_normal')(c8)
    c8 = BatchNormalization()(c8)
    c8 = Activation("relu")(c8)

    u9 = concatenate([UpSampling2D((2, 2))(c8), c1]) # , axis=3
    c9 = Conv2D(f_scale*16, 3, padding='same', kernel_initializer = 'he_normal')(u9)
    c9 = BatchNormalization()(c9)
    c9 = Activation("relu")(c9)
    c9 = Conv2D(f_scale*16, 3, padding='same', kernel_initializer = 'he_normal')(c9)
    c9 = BatchNormalization()(c9)
    c9 = Activation("relu")(c9)
    
    logits = Conv2D(3, 1, padding='same', activation="relu", kernel_initializer = 'he_normal')(c9)
    
    return tf.keras.Model(inputs=img_input, outputs=logits)

In [None]:
def dice_coef(y_true, y_pred):
    dice=0.0
    smooth=1.0
    for i in range(0, n_classes):
        intersection = y_true[:,:,i] * y_pred[:,:,i] # "area" of overlap (for that class)
        all = y_true[:,:,i] + y_pred[:,:,i] # total number of pixels combined (for that class)
        intersection = K.sum(intersection, 1) # Add to "intersection" value along channel axis?
        all = K.sum(all, 1) #  Add to "total pixed number" value along channel axis?
        temp = (2. * intersection + smooth) / (all + smooth) # (2*area of overlap)/(total pixels combined)
        temp = K.mean(temp) # ? get mean over batch ?
        dice = dice + temp # add the dice score for each class
    return dice/(n_classes) # divide the summed dice scores by number of classes

In [None]:
# need to define a sample image and sample mask
sample_image = X_train[11]
sample_mask = y_train[11]

def display(display_list):
    '''display([sample_image, sample_mask])'''
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 'Predicted Mask']
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()
    
def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

def show_predictions():
    display([sample_image, sample_mask, create_mask(model.predict(sample_image[tf.newaxis, ...]))])

In [None]:
class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

In [None]:
model = unet_model(input_height=img_height,  input_width=img_width, f_scale = 0.5)
# tf.keras.utils.plot_model(model, show_shapes=True, dpi=64)

In [None]:
show_predictions()


In [None]:
BATCH_SIZE = 32

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_dataset = val_dataset.batch(BATCH_SIZE)

In [None]:
# Instantiate an optimizer.
optimizer = Adam(learning_rate=0.001)

# Instantiate a loss function.
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

In [None]:
# ReduceLROnPlateau
wait = 0 
best_dice_coef = 0 
patience = 5
factor = 0.1

# Prepare the metrics.
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.CategoricalAccuracy()
train_dice_coef = tf.keras.metrics.Mean("train_dice_coef", dtype=tf.float32)
valid_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
valid_accuracy = tf.keras.metrics.CategoricalAccuracy()
valid_dice_coef = tf.keras.metrics.Mean("val_dice_coef", dtype=tf.float32)

In [None]:
def train_step(model, optimizer, x, y):
    # Open a GradientTape to record the operations run during the forward pass
    with tf.GradientTape() as tape:
        logits = model(x, training=True) 
        # +
        # Regular precision
        loss_value = loss_fn(y, logits)
        # -
        # +
        # Mixed precision
        # scaled_loss = opt.get_scaled_loss(loss_value)
        # -
        
    # +
    # Mixed precision
    # scaled_gradients = tape.gradient(scaled_loss, model.trainable_weights)
    # gradients = opt.get_unscaled_gradients(scaled_gradients)
    # -
    # +
    # Regular precision
    gradients = tape.gradient(loss_value, model.trainable_weights)
    # -
    
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    
    # Update training metrics.
    train_loss(loss_value)
    train_accuracy(y, logits)
    dice_score = dice_coef(y, logits)
    train_dice_coef(dice_score)
    return loss_value

def test_step(model, x, y):
    val_logits = model(x, training=False)
    val_loss_value = loss_fn(y, val_logits)
    valid_accuracy(y, val_logits) 
    valid_loss(val_loss_value)
    dice_score = dice_coef(y, val_logits)
    valid_dice_coef(dice_score)
    return val_loss_value

def progress(status, count, total, value=''):
    bar_len = 50
    filled_len = int(round(bar_len * count / float(total)))
    bar = '=' * filled_len + '-' * (bar_len - filled_len)
    print('\r%s: [%s] %s / %s ... loss: %s' % *(status, bar, count, total, value), end='') 

In [None]:
epochs = 50
num_steps = len(X_train) // BATCH_SIZE

start_time = time.time()
for epoch in range(epochs):
    epoch_start_time = time.time()
    print("\nStart of epoch %d" % (epoch,))
    # ------------------ TRAIN MODEL ON BATCHES ------------------
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(model, optimizer, x_batch_train, y_batch_train)
        progress("training", step, num_steps, round(float(loss_value)))
    
    # ------------------ VALIDATE MODEL ON BATCHES ------------------
    for x_batch_val, y_batch_val in val_dataset:
        val_loss_value = test_step(model, x_batch_val, y_batch_val)
        progress("validation", step, num_steps, round(float(val_loss_value)))
    
    wait += 1
    if np.greater(float(valid_accuracy.result(), float(best_val_acc))):
        print("\nval_accuracy increased from %.4f to %.4f" % (round(float(best_val_acc), 3), 
                                                              round(float(valid_accuracy.result()), 3)))
        vest_val_acc = valid_accuracy.result()
        wait = 0
    elif total_wait >= early_stopping:
        print("\n val_accuracy did not increase: early stopping")
        train_loss.reset_states()
        train_accuracy.reset_states()
        train_dice_coef.reset_states()
        valid_loss.reset_states()
        valid_accuracy.reset_states()
        valid_dice_coef.reset_states()
        print("Time taken: %.2fs" % (time.time() - epoch_start_time))
        break
    elif wait >= patience:
        print("\nval_accuracy did not increase: lowering learning rate")
        lr = float(K.get_value(optimizer.learning_rate))
        new_lr = lr * factor
        if new_lr <= 1e-10
            print("Min learning rate reached: early stopping")
            break
        K.set_value(optimizer.lr, new_lr)
        wait = 0
        print("Epoch %d: Learning rate is: %.10e" % (epoch+1, new_lr))
    else:
        print("\nval_accuracy did not increase")
        
    template = "Epoch {}: Loss: {} , Accuracy: {} , Dice: {} , Val Loss: {} , Val Accuracy: {} , Val Dice: {}"
    print(template.format(epoch+1,
                         round(float(train_loss.result()), 2),
                         round(float(train_accuracy.result()), 2),
                         round(float(train_dice_coef.result()), 2),
                         round(float(valid_loss.result()), 2),
                         round(float(valid_accuracy.result()), 2),
                         round(float(valid_dice_coef.result()), 2)))
    
    train_loss.reset_states()
    valid_loss.reset_states()
    train_dice_coef.reset_states()
    train_accuracy.reset_states()
    valid_accuracy.reset_states()
    valid_dice_coef.reset_states()
    print("Time taken: %.2fs" % (time.time() - epoch_start_time))
    
    
end_time = time.time()
t_minutes = (end_time - start_time) // 60
print("Training finished in %.1f minutes" % t_minutes)

In [None]:
img_num = 25

title = ['Input Image', 'Predicted Mask', 'True Mask']
pred = model.predict(np.reshape(X_test[img_num], (1, img_width, img_height, 3)))
pred = np.reshape(pred, (img_width, img_height, 3))
pred = np.argmax(pred, axis=2)

plt.figure(figsize=(14,6))

plt.subplot(1,3,1)
plt.title(title[0])
plt.imshow(X_test[img_num])
plt.gca().axis('off')

plt.subplot(1,3,2)
plt.title(title[2])
plt.imshow(np.reshape(y_test[img_num], (img_width, img_height, 3)))
plt.gca().axis('off')

plt.subplot(1,3,3)
plt.title(title[1])
plt.imshow(pred)
plt.gca().axis('off')

plt.tight_layout()
plt.show()