In [None]:
import sys
import random
import numpy as np
from time import time
from pathlib import Path
from matplotlib import colors
import matplotlib.pyplot as plt
from IPython.display import clear_output

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, LambdaCallback
from tensorflow.keras import mixed_precision

from utils.plot_utils import plot_iou_trainId, plot_iou_catId
from utils.data_utils import get_labels, parse_record, get_dataset_from_tfrecord
from models.hrnet_keras import HRNet
from models.u2net import U2NET

K.clear_session()
physical_devices = tf.config.experimental.list_physical_devices("GPU")

def enable_amp():
    mixed_precision.set_global_policy("mixed_float16")
    

print("Tensorflow version: ", tf.__version__)
# print(physical_devices,"\n")
# enable_amp() 

In [None]:
%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')
plt.rc('xtick',labelsize=16)
plt.rc('ytick',labelsize=16)

In [None]:
# echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
# sudo apt-get install apt-transport-https ca-certificates gnupg
# curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
# sudo apt-get update && sudo apt-get install google-cloud-sdk
# gcloud init --console-only

In [None]:
# gsutil cp gs://cl_datasets_01/cityscapes/records/trainIds_train.record /home/ubuntu/cityscapes_cv/records/
# gsutil cp gs://cl_datasets_01/cityscapes/records/trainIds_val.record /home/ubuntu/cityscapes_cv/records/
# gsutil cp gs://cl_datasets_01/cityscapes/records/trainIds_train_extra.record /home/ubuntu/cityscapes_cv/records/

In [None]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

In [None]:
fine = True

if fine:
    train_tfrecord_dir = "records/trainIds_train.record"
    test_tfrecord_dir = "records/trainIds_val.record"
else:
    train_tfrecord_dir = "records/trainIds_train_extra.record"

img_height = 64 # 512
img_width = 128 # 1024
n_classes = 20

labels = get_labels()
trainid2label = { label.trainId : label for label in labels }
catId2label = { label.categoryId : label for label in labels }

In [None]:
@tf.function
def random_crop(image, mask):
    """
    Inputs: full resolution image and mask
    A scale between 0.5 and 1.0 is randomly chosen. 
    Then, we multiply original height and width by the scale, 
    and randomly crop to the scaled height and width.
    """
    
    scales = tf.convert_to_tensor(np.array(
        [0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0]))
    scale = scales[tf.random.uniform(shape=[], minval=0, maxval=13, dtype=tf.int32)]
    scale = tf.cast(scale, tf.float32)
    
    shape = tf.cast(tf.shape(image), tf.float32)
    h = tf.cast(shape[0] * scale, tf.int32)
    w = tf.cast(shape[1] * scale, tf.int32)
    combined_tensor = tf.concat([image, mask], axis=2)
    combined_tensor = tf.image.random_crop(combined_tensor, size=[h, w, 4])
    return combined_tensor[:,:,0:3], combined_tensor[:,:,-1]

@tf.function
def mask_to_categorical(image, mask):
    mask = tf.squeeze(mask)
    mask = tf.one_hot(tf.cast(mask, tf.int32), n_classes)
    mask = tf.cast(mask, tf.float32)
    return image, mask


@tf.function
def load_image_train(input_image, input_mask):
    
    image = tf.cast(input_image, tf.uint8)
    mask = tf.cast(input_mask, tf.uint8)
    
    if tf.random.uniform(()) > 0.5:
        image = tf.image.flip_left_right(image)
        mask = tf.image.flip_left_right(mask)
         
    # if tf.random.uniform(()) > 0.5:
        # image, mask = random_crop(image, mask)
        # mask = tf.expand_dims(mask, axis=-1)
    
    image = tf.image.resize(image, (img_height, img_width))
    mask = tf.image.resize(mask, (img_height, img_width))
    
    image = tf.cast(image, tf.float32) / 255.0
    
    if tf.random.uniform(()) > 0.5:
        image = tf.image.random_brightness(image, 0.05)
        image = tf.image.random_saturation(image, 0.6, 1.6)
        image = tf.image.random_contrast(image, 0.7, 1.3)
        image = tf.image.random_hue(image, 0.05)
    
    image, mask = mask_to_categorical(image, mask)
    mask = tf.squeeze(mask)

    return image, mask


def load_image_test(input_image, input_mask):
    image = tf.image.resize(input_image, (img_height, img_width))
    mask = tf.image.resize(input_mask, (img_height, img_width))
    
    image = tf.cast(image, tf.float32) / 255.0
    image, mask = mask_to_categorical(image, mask)
    mask = tf.squeeze(mask)
    return image, mask


def load_image_eval(input_image, input_mask):
    input_image = tf.image.resize(input_image, (img_height, img_width))
    # input_mask = tf.image.resize(input_mask, (img_height, img_width))
    
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_image, input_mask = mask_to_categorical(input_image, input_mask)
    input_mask = tf.squeeze(input_mask)
    return input_image, input_mask


def load_image_train_no_aug(input_image, input_mask):
    input_image = tf.image.resize(input_image, (img_height, img_width))
    input_mask = tf.image.resize(input_mask, (img_height, img_width))
    
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_image, input_mask = mask_to_categorical(input_image, input_mask)
    input_mask = tf.squeeze(input_mask)
    return input_image, input_mask

In [None]:
if fine:
    TRAIN_LENGTH = 2975
    TEST_LENGTH = 500
else:
    TRAIN_LENGTH = 18000
    TEST_LENGTH = 1998

BATCH_SIZE = 12
ACCUM_STEPS = 6
BUFFER_SIZE = 500
ADJ_BATCH_SIZE = BATCH_SIZE // ACCUM_STEPS

In [None]:
if fine:
    train_ds = get_dataset_from_tfrecord(train_tfrecord_dir)
    test_ds = get_dataset_from_tfrecord(test_tfrecord_dir)
else:
    all_ds = get_dataset_from_tfrecord(train_tfrecord_dir)
    train_ds = all_ds.skip(TEST_LENGTH)
    test_ds = all_ds.take(TEST_LENGTH) 

In [None]:
# Preprocessing: random crop the images and masks, flip them
train = train_ds.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test = test_ds.map(load_image_test)
eval = test_ds.map(load_image_eval)

In [None]:
train_dataset = train.shuffle(BUFFER_SIZE).batch(ADJ_BATCH_SIZE)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test.batch(ADJ_BATCH_SIZE)

In [None]:
def label_to_rgb(mask):
    h = mask.shape[0]
    w = mask.shape[1]
    mask_rgb = np.zeros((h, w, 3), dtype=np.uint8)
    for val, key in trainid2label.items():
        indices = mask == val
        mask_rgb[indices.squeeze()] = key.color 
    return mask_rgb


def display(display_list, title=True):
    plt.figure(figsize=(15, 5)) # dpi=200
    if title:
        title = ['Input Image', 'True Mask', 'Predicted Mask']
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        if title:
            plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
for image, mask in train.take(21): # 16
    sample_image, sample_mask = image, mask

sample_mask = tf.argmax(sample_mask, axis=-1)
sample_mask = sample_mask[..., tf.newaxis]
sample_mask = label_to_rgb(sample_mask.numpy())
display([sample_image, sample_mask])

In [None]:
if fine:
    MODEL_PATH = "weights/"+model.name+".h5"
else:
    MODEL_PATH = "weights/"+model.name+"_coarse.h5"

In [None]:
def create_mask(pred_mask):
    pred_mask = tf.squeeze(pred_mask)
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    pred_mask = label_to_rgb(pred_mask.numpy())
    return pred_mask


def show_predictions():
    pred_mask = model.predict(sample_image[tf.newaxis, ...])
    if "u2net" in model.name:
        pred_mask = pred_mask[0]
    display([sample_image, sample_mask, create_mask(pred_mask)])

        
class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        clear_output(wait=True)
        show_predictions()
        print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

In [None]:
train_dataset = strategy.experimental_distribute_dataset(train_dataset)
test_dataset = strategy.experimental_distribute_dataset(test_dataset)

### Custom loop

In [None]:
logger_file = "cityscapes_fine_hrnet.csv"

In [None]:
@tf.function
def iou_macro_coef(y_true, y_pred):
    smooth = 1
    iou_total = 0
    for i in range(1, n_classes):
        tp = tf.math.reduce_sum(y_pred[:,:,:,i] * y_true[:,:,:,i], axis=(1,2))
        fn = tf.math.reduce_sum(y_true[:,:,:,i] * (1 - y_pred[:,:,:,i]), axis=(1,2)) 
        fp = tf.math.reduce_sum(y_pred[:,:,:,i] * (1 - y_true[:,:,:,i]), axis=(1,2)) 
        iou = tf.math.reduce_mean(tf.math.divide_no_nan(tp+smooth, tp+fn+fp+smooth), axis=0)
        iou_total += iou

    iou_macro = iou_total / (n_classes - 1)
    return iou_macro


@tf.function
def train_step(x, y):
    def train_fn(opt, x, y):
        with tf.GradientTape() as tape :
            output = model(x, training=True)
            loss = cce_loss(y, output)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))
        return loss

    per_example_losses = strategy.run(train_fn, args=(opt,x, y,))
    mean_loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_example_losses, axis=None)
    return mean_loss


@tf.function
def test_step(x, y) :
    def test_fn(x, y) :
        output = model(x, training=False)
        loss = cce_loss(y, output)
        miou = iou_macro_coef(y, output)
        return loss, miou

    pe_loss, pe_miou = strategy.run(test_fn, args=(x, y,))
    mean_loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, pe_loss, axis=None)
    mean_miou = strategy.reduce(tf.distribute.ReduceOp.MEAN, pe_miou, axis=None)
    return mean_loss, mean_miou

In [None]:
def train(epochs):
    
     logfile = Path(logger_file)

    if not logfile.exists() :
        tmpf = logfile.open("w+")
        top_text = "epoch, loss, val_loss, miou\n"
        tmpf.write(top_text)
        tmpf.close()
    

    with strategy.scope():

        # model = U2NET(input_height=img_height, input_width=img_width, n_classes=n_classes)
        model = HRNet(input_height=img_height, input_width=img_width, n_classes=20, W=40)

        #  model.load_weights("weights/"+model.name+"_coarse.h5")

        cce_loss = tf.keras.losses.CategoricalCrossentropy()

        initial_lr = 0.01 
        end_lr = 1e-5 
        decay_steps = 120000 
        learning_rate_fn = PolynomialDecay(
            initial_lr,
            decay_steps,
            end_lr,
            power=0.9
        )

        optimizer = SGD(learning_rate=learning_rate_fn, momentum=0.9, decay=0.0005)

        train_loss_metric = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
        valid_loss_metric = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        valid_miou_metric = tf.keras.metrics.Mean('valid_iou_coef', dtype=tf.float32)

        history = {"loss": [], "val_loss": [], "val_iou_coef": []}

        start_time = time()
        for epoch in range(epochs):
            epoch_start_time = time()

            for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
                loss = train_step(x_batch_train, y_batch_train)
                # Update training metrics.
                train_loss_metric.update_state(loss)
                print("\r Batch {} -- loss: {:.4f}".format(step, loss.numpy()), end='')


            for x_batch_val, y_batch_val in test_dataset:
                loss, miou = test_step(x_batch_val, y_batch_val)
                valid_loss_metric.update_state(loss)
                valid_miou_metric.update_state(miou)

            train_loss = train_loss_metric.result().numpy() 
            valid_loss = valid_loss_metric.result().numpy()
            valid_miou = valid_miou_metric.result().numpy()

            history['loss'].append(train_loss)
            history['val_loss'].append(valid_loss)
            history['val_iou_coef'].append(valid_miou)
            
            tmpf = logfile.open("a+")
            tmpf.write(",".join([str(epoch), str(train_loss), str(valid_loss), str(valid_miou)]) + "\n")
            tmpf.close()

            train_loss_metric.reset_states()     
            valid_loss_metric.reset_states()
            valid_miou_metric.reset_states()
            
            

            t_epoch = time() - epoch_start_time
            template = '\n Epoch {} -- Time: {:.2f}s, Loss: {:.4f}, Val Loss: {:.4f}, Val mIoU: {:.4f}'
            print(template.format(epoch+1, t_epoch, train_loss, valid_loss, valid_miou))
            
            model.save(MODEL_PATH)

            # show_predictions()



In [None]:
def train(epochs=20):
    start_time = time()
    for epoch in range(epochs):
        epoch_start_time = time()
        
        total_iou = 0
        total_loss = 0
        # get trainable variables
        train_vars = model.trainable_variables 
        # Create empty gradient list (not a tf.Variable list)
        accum_gradient = [tf.zeros_like(this_var) for this_var in train_vars]

        for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):

            loss_value, iou_value, accum_gradient = accumilate_train_step(
                x_batch_train, y_batch_train, accum_gradient, train_vars)
            
            total_loss += loss_value
            total_iou += iou_value

            # Now, after executing all the tapes you needed, we apply the optimization step
            if (step > 0) and (step % ACCUM_STEPS == 0):
                
                optimizer.apply_gradients(zip(accum_gradient, train_vars))

                loss = total_loss / ACCUM_STEPS
                iou = total_iou / ACCUM_STEPS

                # Update training metrics.
                train_loss_metric.update_state(total_loss)
                train_miou_metric.update_state(iou)

                print("\r Batch {} -- loss: {:.4f}, IoU: {:.4f}".format(
                    (step // ACCUM_STEPS), loss.numpy(), iou.numpy()), end='')

                total_loss = 0
                total_iou = 0
                # get trainable variables
                train_vars = model.trainable_variables 
                # Create empty gradient list (not a tf.Variable list)
                accum_gradient = [tf.zeros_like(this_var) for this_var in train_vars]

                
        for x_batch_val, y_batch_val in test_dataset:
            test_step(x_batch_val, y_batch_val)
        

        train_loss = train_loss_metric.result().numpy() / ACCUM_STEPS
        train_miou = train_miou_metric.result().numpy()

        valid_loss = valid_loss_metric.result().numpy()
        valid_miou = valid_miou_metric.result().numpy()

        history['loss'].append(train_loss)
        history['val_loss'].append(valid_loss)
        history['iou_coef'].append(train_miou)
        history['val_iou_coef'].append(valid_miou)

        train_loss_metric.reset_states()
        train_miou_metric.reset_states()        

        valid_loss_metric.reset_states()
        valid_miou_metric.reset_states()

        t_epoch = time() - epoch_start_time
        template = '\n Epoch {} -- Time: {:.2f}s, Loss: {:.4f} , mIoU: {:.4f}, Val Loss: {:.4f}, Val mIoU: {:.4f}'
        print(template.format(epoch+1, t_epoch, train_loss, train_miou, valid_loss, valid_miou))
        
        model.save(MODEL_PATH)

        #show_predictions()
        
    end_time = time()
    t_minutes = (end_time - start_time) // 60
    print("Training finished in {:.2f} minutes".format(t_minutes))
    return model, history

In [None]:
model, history = train(epochs=20)

# No accumilate: 411s per epoch
# Accumilate: 581s per epoch

In [None]:
def train(epochs):
    start_time = time()
    for epoch in range(epochs):
        epoch_start_time = time()

        for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
            loss, iou = train_step(optimizer, x_batch_train, y_batch_train)
            print("\r Batch {} -- loss: {:.4f}, IoU: {:.4f}".format(step, loss.numpy(), iou.numpy()), end='')

        for x_batch_val, y_batch_val in test_dataset:
            test_step(x_batch_val, y_batch_val)

        train_loss = train_loss_metric.result().numpy()
        train_miou = train_miou_metric.result().numpy()

        valid_loss = valid_loss_metric.result().numpy()
        valid_miou = valid_miou_metric.result().numpy()

        history['loss'].append(train_loss)
        history['val_loss'].append(valid_loss)
        history['iou_coef'].append(train_miou)
        history['val_iou_coef'].append(valid_miou)

        train_loss_metric.reset_states()
        train_miou_metric.reset_states()        

        valid_loss_metric.reset_states()
        valid_miou_metric.reset_states()

        t_epoch = time() - epoch_start_time
        template = '\n Epoch {} -- Time: {:.2f}s, Loss: {:.4f} , mIoU: {:.4f}, Val Loss: {:.4f}, Val mIoU: {:.4f}'
        print (template.format(epoch+1, t_epoch, train_loss, train_miou, valid_loss, valid_miou))

        show_predictions()

    end_time = time()
    t_minutes = (end_time - start_time) // 60
    print("Training finished in {:.2f} minutes".format(t_minutes))
    return model, history

In [None]:
def plot_history(history, model):
        
    plt.figure(figsize=(15,7))
    plt.subplot(1,2,1)  
    if "u2net" in model.name:
        plt.plot(history['d0_loss'], 'r', label='Training loss')
        plt.plot(history['val_d0_loss'], 'b', label='Validation loss')
    else: 
        plt.plot(history['loss'], 'r', label='Training loss')
        plt.plot(history['val_loss'], 'b', label='Validation loss')
    plt.title("Loss: "+model.name, fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.legend(prop={'size': 14})

    plt.subplot(1,2,2)
    if "u2net" in model.name:
        plt.plot(history['d0_iou_coef'], 'r', label='IoU coefficient')
        plt.plot(history['val_d0_iou_coef'], 'b', label='Validation IoU coefficient')
    else:
        plt.plot(history['iou_coef'], 'r', label='IoU coefficient')
        plt.plot(history['val_iou_coef'], 'b', label='Validation IoU coefficient')
    plt.title('IoU Coefficient: '+model.name, fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.legend(prop={'size': 14})
    if fine:
        plt.savefig("plots/"+model.name+"_learning_curves.png")
    else:
        plt.savefig("plots/"+model.name+"_learning_curves_coarse.png")
    plt.show()

In [None]:
plot_history(history, model)

### Evaluation

In [None]:
def evaluate_iou(model, dataset, n_samples):
    
    iou_macro_scores = np.zeros((n_samples,))
    inf_times = np.zeros((n_samples, ))
    miou_op =  tf.keras.metrics.MeanIoU(num_classes=n_classes-1)
    
    for idx, (image, mask) in enumerate(dataset):
        print("\r Predicting {} \ {} ".format(idx+1, n_samples), end='')
        
        X = np.expand_dims(image.numpy(), axis=0)
        y_true = np.expand_dims(mask.numpy(), axis=0)
        
        t_start = time()
        y_pred = model.predict(X)
        t_end = time()
        t_inf = t_end-t_start
        
        inf_times[idx] = t_inf
        
        if "u2net" in model.name:
            y_pred = y_pred[0]
            
        # y_pred = tf.image.resize(y_pred, (1024, 2048))
        threshold = tf.math.reduce_max(y_pred, axis=-1, keepdims=True)
        y_pred = tf.logical_and(y_pred >= threshold, tf.abs(y_pred) > 1e-12)
        
        y_pred = tf.cast(tf.squeeze(y_pred, axis=0), tf.int32)
        y_true = tf.cast(tf.squeeze(y_true, axis=0), tf.int32)
        
        y_true = tf.argmax(y_true[:,:,1:], axis=-1)
        y_pred = tf.argmax(y_pred[:,:,1:], axis=-1)
                
        # miou_op.reset_states()
        miou_op.update_state(y_true, y_pred)
        iou_macro = miou_op.result().numpy()
        iou_macro_scores[idx] = iou_macro
        
        if idx == (n_samples-1):
            break
    
    print("Average inference time: {:.2f}s".format(np.mean(inf_times)))
            
    return iou_macro_scores, miou_op


def mean_over_valid(x):
    return np.mean(x[x != -1])

To do: Make mean IoU a metric, so you can do

```python
loss, accuracy, miou = model.evaluate(dataset)
```

Might need to write a custom training loop to reset metric states

In [None]:
iou_macro_scores, miou_op = evaluate_iou(model=model, dataset=test, n_samples=TEST_LENGTH)
# iou_macro_scores, miou_op = evaluate_iou(model=model, dataset=eval, n_samples=TEST_LENGTH)
iou_mean_macro = np.mean(iou_macro_scores)

In [None]:
union_int = np.sum(miou_op.get_weights()[0], axis=0)+np.sum(miou_op.get_weights()[0], axis=1)
inters = np.diag(miou_op.get_weights()[0])
ious = inters / (union_int-inters+1)

print("Mean IoU: {:.4f} \n".format(iou_mean_macro))
for i in range(ious.shape[0]) :
    print("IoU for {}: {:.2f}".format(trainid2label[i+1].name, np.round(ious[i], 2)))

In [None]:
plot_iou_trainId(
    trainId_label_map=trainid2label,
    catId_label_map=catId2label, 
    n_classes=n_classes, 
    iou_class=ious,
    model=model, 
    iou_mean=iou_mean_macro,
    current_dir=""
)

In [None]:
def visualize_confusion_matrix(confusion, metric, label_classes, model):
    plt.figure(figsize=(14,14))
    plt.title("{} Confusion Matrix, with Mean IoU = {:.3f}".format(model.name, metric), fontsize=22)
    plt.imshow(confusion)
    ax, fig = plt.gca(), plt.gcf()
    plt.xticks(np.arange(len(label_classes)), label_classes)
    plt.yticks(np.arange(len(label_classes)), label_classes)
    # set horizontal alignment mode (left, right or center) and rotation mode(anchor or default)
    plt.setp(ax.get_xticklabels(), rotation=-90, ha="center", rotation_mode="default")
    # avoid top and bottom part of heatmap been cut
    ax.set_xticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.set_yticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.tick_params(which="minor", bottom=False, left=False)
    ax.grid(False)
    fig.tight_layout()
    plt.show()

In [None]:
visualize_confusion_matrix(
    confusion = miou_op.get_weights()[0] / np.sum(miou_op.get_weights()[0], axis=0), 
    metric = iou_mean_macro, 
    label_classes = [trainid2label[i].name for i in range(1, n_classes)],
    model = model
)