In [18]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *
from tensorflow.keras.metrics import *
from tensorflow.keras.preprocessing.image import *
from tensorflow.data import *

import tensorflow_addons as tfa

import segmentation_models
from segmentation_models.losses import *
from segmentation_models.metrics import iou_score

import matplotlib.pyplot as plt

import os
import datetime
import math

In [19]:
from output_processing import output_image_processing

In [20]:
tf.__version__

'2.4.1'

# Image Preprocessing & Prepare Datasets

## Constants

In [21]:
DATA_ROOTDIR = r"./data"
# Training data should be at DATA_ROOTDIR/train
# Testing data should be at DATA_ROOTDIR/test

IMAGE_DIM = 128
# Input images have resolution [IMAGE_DIM, IMAGE_DIM, 3]
# Output masks have resolution [IMAGE_DIM, IMAGE_DIM]

BATCH_SIZE = 16
TRAIN_SPLIT = 0.8
VALIDATION_SPLIT = 1 - TRAIN_SPLIT

## Calculating the size of training and validation folds

In [22]:
def count_data_points(fold):
    # fold: "train" / "test"
    rootdir = DATA_ROOTDIR
    rootdir = os.path.join(rootdir, fold)
    dataset_size = 0
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            if "labelled" in file:
                dataset_size += 1
    return dataset_size

In [23]:
train_dataset_size = count_data_points("train")

train_size = int(train_dataset_size * TRAIN_SPLIT)
validation_size = train_dataset_size - train_size

print(f"Train size: {train_size}")
print(f"Validation size: {validation_size}")

Train size: 4463
Validation size: 1116


## Parsing the image data from DATA_ROOTDIR

In [24]:
resize_and_rescale = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(IMAGE_DIM, IMAGE_DIM),
  layers.experimental.preprocessing.Rescaling(1./255)
])

def parse_image(mask_path):
    mask_png = tf.io.read_file(mask_path)
    mask = tf.io.decode_png(mask_png)
    mask = tf.image.rgb_to_grayscale(mask)
    mask = resize_and_rescale(mask)
    mask = 1 - mask

    image_path = tf.strings.substr(mask_path, 0, tf.strings.length(mask_path) - tf.strings.length("-labelled.png")) + ".png"
    image_png = tf.io.read_file(image_path)
    image = tf.io.decode_png(image_png)
    image = resize_and_rescale(image)

    return image, mask

## Data Augmentation

In [25]:
def augment_data(image, mask):
    # Flipping left and right randomly
    flip_left_right = tf.math.round(tf.random.uniform([]))
    if flip_left_right == 1:
        tf.image.flip_left_right(image)
        tf.image.flip_left_right(mask)
    
    # Flipping up and down randomly
    flip_up_down = tf.math.round(tf.random.uniform([]))
    if flip_up_down == 1:
        tf.image.flip_up_down(image)
        tf.image.flip_up_down(mask)
    
    # Rotation by random angle
    rot_ang = tf.random.uniform([], minval=0, maxval=np.pi * 2)
    image = tfa.image.rotate(image, rot_ang)
    mask = tfa.image.rotate(mask, rot_ang)

    # Rotation by either [0, 90, 180, 270] degrees
    # rot_cnt = tf.cast(tf.math.floor(tf.random.uniform([], minval=0, maxval=4)), dtype=tf.int32)
    # image = tf.image.rot90(image, rot_cnt)
    # mask = tf.image.rot90(mask, rot_cnt)

    return image, mask

## Configuring the training and validation datasets

In [26]:
def configure_dataset(dataset, training=False):
    dataset = dataset.shuffle(1000).repeat().map(parse_image)
    if training:
        # Only augment the training fold
        dataset = dataset.map(lambda image, mask: augment_data(image, mask))
    return dataset.batch(BATCH_SIZE, drop_remainder=True)

# Load the list of mask paths into train_dataset
# Do not shuffle the dataset at this stage - Tiles are overlapping and ordered from top-left to bottom-right, so shuffling might result in significant overlap of areas between training and validation data
train_dataset = Dataset.list_files(os.path.join(DATA_ROOTDIR, "train", "*-labelled.png"), shuffle=False)

# Split the entire dataset (of paths) into training and validation folds
train_ds = train_dataset.take(train_size)
validation_ds = train_dataset.skip(train_size).take(validation_size)

# Set up the image dataset from the path dataset
train_ds = configure_dataset(train_ds, training=True)
validation_ds = configure_dataset(validation_ds)

print(train_ds)
print(validation_ds)

<BatchDataset shapes: ((16, None, None, None), (16, 128, 128, 1)), types: (tf.float32, tf.float32)>
<BatchDataset shapes: ((16, 128, 128, None), (16, 128, 128, 1)), types: (tf.float32, tf.float32)>


# Building the Model

## Constants

In [27]:
# Each model will be stored in a separate folder named by the model itself in the MODELS_ROOTDIR
MODELS_ROOTDIR = r"./"

# if MODEL_LOADED == None: train new model; otherwise, load the specified existing model
MODEL_LOADED = "model @2021-03-09 13-52-58.147135"


## Model Architecture

In [28]:
model = segmentation_models.Unet("resnet34", encoder_weights="imagenet", decoder_use_batchnorm=True)
model.compile('Adam', loss=DiceLoss(0.8), metrics=[iou_score, Precision(0.5), Recall(0.5)])

## Configure how the Model is saved

timestamp = str(datetime.datetime.now())
timestamp = timestamp.replace(":", "-")

rootdir = MODELS_ROOTDIR
folder = MODEL_LOADED
if folder == None:
    folder = f"model @{timestamp}" # Name of model

model_dir = os.path.join(rootdir, folder) # Path of model to be saved
checkpoint_path = os.path.join(model_dir, "weights") # Path of model weights to be saved

## Train the Model

In [29]:
# Train a new model only if model_dir does not exist
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)

    # Only save the best model (smallest loss) obtained throughout the training process
    cp_callback = callbacks.ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True, monitor="val_loss", verbose=1)

    # Save model history?
    # Plot history graph with tensorboard?
    history = model.fit(x=train_ds, initial_epoch=0, epochs=35, steps_per_epoch=200, validation_data=validation_ds, validation_steps=50, callbacks=[cp_callback], workers=12)

# Testing the Model

## Configure the test dataset

In [30]:
test_ds = Dataset.list_files(os.path.join(DATA_ROOTDIR, "test", "*-labelled.png"), shuffle=False)
test_size = count_data_points("test")
test_ds = test_ds.map(parse_image).repeat(1).batch(BATCH_SIZE, drop_remainder=True)
print(f"Test size: {test_size}")
print(test_ds)

Test size: 1127
<BatchDataset shapes: ((16, 128, 128, None), (16, 128, 128, 1)), types: (tf.float32, tf.float32)>


## Evaluate the Model on the test dataset

In [31]:
model.load_weights(checkpoint_path)
test_results = model.evaluate(x=test_ds, return_dict=True)
print(test_results)

{'loss': 0.435035765171051, 'iou_score': 0.4369751811027527, 'precision_1': 0.48186537623405457, 'recall_1': 0.8229793310165405}


## Save the predicted masks

In [32]:
predictions_dir = os.path.join(model_dir, "predictions")
if not os.path.isdir(predictions_dir):
    os.mkdir(predictions_dir)

In [33]:
plt.ioff()

def plot_save(filename, show, *images):
    images_count = len(images)
    fig, ax = plt.subplots(1, images_count)
    for i in range(images_count):
        ax[i].imshow(images[i])
    np.vectorize(lambda ax: ax.axis("off"))(ax)

    if filename != None:
        fig.savefig(filename)

    if not show:
        plt.close(fig)

In [34]:
test_batch = test_ds

test_count = 0
for images, masks in test_batch:
    masks_pred = model.predict(images)
    for i, mask_pred in enumerate(masks_pred):
        image, mask = images[i], masks[i]
        mask_smoothed = output_image_processing(mask_pred, 7, 0.5)
        test_count += 1
        filename = os.path.join(predictions_dir, f"test_{test_count}")
        plot_save(filename, test_count <= 16, image, mask, mask_pred, mask_smoothed)