# Simple U-Net Xception-style model

## Load Dataset
The dataset is composed of cropped and augmented images from the original provided dataset. The structure is
- \results
  - \images
      - img_(n).tif
  - \masks
      - img(n).png

In [None]:
!unzip results.zip

### Importing required libraries
We will be using cv2 for image processing and matplotlib for visualization of the results.
For training the model, keras and tensorflow will be used.
Numpy will be used for numerical operations.

In [None]:
import os
import cv2
import keras
import random
import numpy as np
from tensorflow import data as tf_data
from tensorflow import image as tf_image
from tensorflow import io as tf_io
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow

## Load Images
The images are then loaded by using OpenCV imread method.
Tests were conducted by using keras load_image method but it was not compatible with .Tif images.

### Parameters
- Input dir = input folder (images)
- Target dir = target / annotations folder (masks)
- Img_size = image resolution

In [None]:
input_dir = "./results/images"
target_dir = "./results/masks"
img_size = (128,128)

### Listing image paths
Paths are mapped according to the image and mask names, sorted by their ids and stored in a list.
The lists are latter used to load the images and masks as numpy arrays.

In [None]:
input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".tif")
    ],
    key = lambda x:  x.split("_")[1].split(".")[0]
)
target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ],
    key = lambda x: x.split("_")[1].split(".")[0]
)

### Integrity Check
Before proceeding, the integrity of the dataset is checked by comparing the number of images and masks along with their names from the lists.
Each display line should have the same image and mask name.

In [None]:
print("Number of inputs:", len(input_img_paths))
print("Number of labels:", len(target_img_paths))
for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

## Loading image arrays from paths
Here we use the cv2 imread method to load the images from the listed paths. Following this operation the images can be easily converted to numpy arrays.

In [None]:
input_img_paths_cv2 = [cv2.imread(path) for path in input_img_paths]
target_img_paths_cv2 = [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in target_img_paths]

for i in range(10):
    cv2_imshow(input_img_paths_cv2[i])
    cv2_imshow(target_img_paths_cv2[i])

## Convert Img object to numpy array
For further processing, the images are converted to numpy arrays. This ensures that the images can be used for training the model.


In [None]:
input_imgs_np = [(np.asarray(img) / 255).astype(np.float32) for img in input_img_paths_cv2]
target_imgs_np = [np.expand_dims((np.asarray(img) / 255).astype(np.uint8), axis=-1) for img in target_img_paths_cv2]

## Create Tensorflow Batch Dataset
Now that the images and masks are loaded, we can create a tensorflow dataset from the numpy arrays. This will allow us to use the dataset for training the model later on.

In [None]:
def get_dataset(
    batch_size,
    img_size,
    input_img_arr,
    target_img_arr,
    max_dataset_len=None,
):
    """Returns a TF Dataset."""

    if max_dataset_len:
        input_img_arr = input_img_arr[:max_dataset_len]
        target_img_arr = target_img_arr[:max_dataset_len]
    dataset = tf_data.Dataset.from_tensor_slices((input_img_arr, target_img_arr))
    return dataset.batch(batch_size)

### Define training / test / validation Datasets
Instead of using the entire dataset for training, we can split the dataset into training, testing and validation datasets. This is done by divinding the original numpy arrays into 2 parts, one for training and the other for testing and validation. To ensure proper shuffling of the separated numpy arrays, we are setting a fixed seed value.

In [None]:
val_percent = 0.25 # Percentage of the data to be used for validation

val_samples = int(len(input_imgs_np) * val_percent)
random.Random(800).shuffle(input_imgs_np)
random.Random(800).shuffle(target_imgs_np)

train_input_img_sample = input_imgs_np[:-val_samples]
train_target_img_sample = target_imgs_np[:-val_samples]
val_input_img_sample = input_imgs_np[-val_samples:]
val_target_img_sample = target_imgs_np[-val_samples:]

#### Verify Shapes
The shapes of the training, testing and validation samples are verified to ensure that the data is divided correctly.

In [None]:
print("Train Shapes")
print(train_input_img_sample[0].shape)
print(train_target_img_sample[0].shape)

print("Evaluation Shapes")
print(val_input_img_sample[0].shape)
print(val_target_img_sample[0].shape)

#### Parameters
- Batch size = number of images to be processed in one go
- num_classes = number of classes in the dataset

In [None]:
num_classes = 1 # Number of classes in the model
batch_size = 2 # Batch size for training

#### Datasets
The actual tensorflow batch datasets.

In [None]:
# training dataset
train_dataset = get_dataset(
    batch_size,
    img_size,
    train_input_img_sample,
    train_target_img_sample,
    max_dataset_len=2000,
)

# validation dataset
valid_dataset = get_dataset(
    batch_size, img_size, val_input_img_sample, val_target_img_sample
)

# evaluation dataset
eval = get_dataset(
    batch_size, img_size, val_input_img_sample, val_target_img_sample
)

# Model
Here we define the CNN model. The base architecture was based on Keras examples for image segmentation, and modified to fit our data requirements and binary segmentation. https://keras.io/examples/vision/oxford_pets_image_segmentation/


## Architecture

In [None]:
from keras import layers

def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### [Second half of the network: upsampling inputs] ###

    for filters in [128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = layers.Conv2D(num_classes, (1,1), activation="sigmoid")(x)

    # Define the model
    model = keras.Model(inputs, outputs)
    return model


# Build model
model = get_model(img_size, num_classes)
model.summary()

## Loss Function
The loss function is defined as binary crossentropy. This is a common loss function used for binary classification tasks.

In [None]:
import tensorflow as tf
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

## Optimizer

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

## Compiling the Model

In [None]:

# Compile the model
model.compile(
    optimizer=optimizer, loss=loss, metrics=[keras.metrics.BinaryAccuracy()]
)

# Callback defined to save the best model during training
callbacks = [
    keras.callbacks.ModelCheckpoint("plot_segmentation.keras", save_best_only=True)
]

## Training
The model is trained using the training dataset. The number of epochs can be adjusted to improve the model performance.

In [None]:
epochs = 50 # Number of epochs for training

# Train the model, doing validation at the end of each epoch
model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=1,
)

# Results

## Testing Predictions
The model is tested on the test dataset to generate predictions. The predictions are then visualized to evaluate the model performance.

In [None]:
test_preds = model.predict(valid_dataset)

### Displaying the Results
The original image, mask and predicted mask are displayed side by side for comparison.

In [None]:
for idx, (input, label) in enumerate(valid_dataset.unbatch()):
  f, axarr = plt.subplots(1,3)
  axarr[0].imshow(input)
  axarr[1].imshow(label.numpy()*255)
  axarr[2].imshow(test_preds[idx]*255)