In [None]:
!unzip results.zip

## Importing libraries

We will be using cv2 for image processing and matplotlib for visualization of the results. For training the model, keras and tensorflow will be used. Numpy will be used for numerical operations.

In [None]:
import os
import cv2
import keras
import random
import numpy as np
from tensorflow import data as tf_data
from tensorflow import image as tf_image
from tensorflow import io as tf_io
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow

## Load images

The images are then loaded by using OpenCV imread method. Tests were conducted by using keras load_image method but it was not compatible with .Tif images.

### Parameters

* Input dir = input folder (images)
* Target dir = target / annotations folder (masks)
* Img_size = image resolution

In [None]:
input_dir = "./results/images"
target_dir = "./results/masks"
img_size = (64,64)

### Listing image paths

Paths are mapped according to the image and mask names, sorted by their ids and stored in a list. The lists are latter used to load the images and masks as numpy arrays.



In [None]:
input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".tif")
    ],
    key = lambda x:  x.split("_")[1].split(".")[0]
)
target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ],
    key = lambda x: x.split("_")[1].split(".")[0]
)

### Integrity Check


Before proceeding, the integrity of the dataset is checked by comparing the number of images and masks along with their names from the lists. Each display line should have the same image and mask name.

In [None]:
print("Number of inputs:", len(input_img_paths))
print("Number of labels:", len(target_img_paths))
for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

### Loading image arrays from paths


Here we use the cv2 imread method to load the images from the listed paths. Following this operation the images can be easily converted to numpy arrays.



In [None]:
input_img_paths_cv2 = [cv2.imread(path) for path in input_img_paths]
target_img_paths_cv2 = [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in target_img_paths]

for i in range(10):
    cv2_imshow(input_img_paths_cv2[i])
    cv2_imshow(target_img_paths_cv2[i])

## Convert Img object to numpy array


For further processing, the images are converted to numpy arrays. This ensures that the images can be used for training the model.



In [None]:
input_imgs_np = [(np.asarray(img) / 255).astype(np.float32) for img in input_img_paths_cv2]
target_imgs_np = [np.expand_dims((np.asarray(img) / 255).astype(np.uint8), axis=-1) for img in target_img_paths_cv2]

## Create Tensorflow Batch Dataset


Now that the images and masks are loaded, we can create a tensorflow dataset from the numpy arrays. This will allow us to use the dataset for training the model later on.



In [None]:
def get_dataset(
    batch_size,
    img_size,
    input_img_arr,
    target_img_arr,
    max_dataset_len=None,
):
    """Returns a TF Dataset."""

    if max_dataset_len:
        input_img_arr = input_img_arr[:max_dataset_len]
        target_img_arr = target_img_arr[:max_dataset_len]
    dataset = tf_data.Dataset.from_tensor_slices((input_img_arr, target_img_arr))
    return dataset.batch(batch_size)

## Define training / test / validation Datasets


Instead of using the entire dataset for training, we can split the dataset into training, testing and validation datasets. This is done by divinding the original numpy arrays into 2 parts, one for training and the other for testing and validation. To ensure proper shuffling of the separated numpy arrays, we are setting a fixed seed value.



In [None]:
val_percent = 0.25 # Percentage of the data to be used for validation

val_samples = int(len(input_imgs_np) * val_percent)
random.Random(800).shuffle(input_imgs_np)
random.Random(800).shuffle(target_imgs_np)

train_input_img_sample = input_imgs_np[:-val_samples]
train_target_img_sample = target_imgs_np[:-val_samples]
val_input_img_sample = input_imgs_np[-val_samples:]
val_target_img_sample = target_imgs_np[-val_samples:]

## Verify Shapes


The shapes of the training, testing and validation samples are verified to ensure that the data is divided correctly.



In [None]:
print("Train Shapes")
print(train_input_img_sample[0].shape)
print(train_target_img_sample[0].shape)

print("Evaluation Shapes")
print(val_input_img_sample[0].shape)
print(val_target_img_sample[0].shape)

## Parameters

* Batch size = number of images to be processed in one go
* num_classes = number of classes in the dataset

In [None]:
num_classes = 1 # Number of classes in the model
batch_size = 2 # Batch size for training

## Datasets
The actual tensorflow batch datasets.

In [None]:
# training dataset
train_dataset = get_dataset(
    batch_size,
    img_size,
    train_input_img_sample,
    train_target_img_sample,
    max_dataset_len=2000,
)

# validation dataset
valid_dataset = get_dataset(
    batch_size, img_size, val_input_img_sample, val_target_img_sample
)

# evaluation dataset
eval = get_dataset(
    batch_size, img_size, val_input_img_sample, val_target_img_sample
)

# Model


## SegNet Architecture

Explicação da SegNet

Encoder:
 * Consiste em blocos de convolução seguidos de normalização em lote e ativação ReLU.
 * Cada bloco é seguido por uma operação de pooling que reduz a dimensionalidade.

Decoder:
 * Blocos de upsampling para aumentar a resolução das features maps.
 * Blocos de convolução para refinar os detalhes após cada operação de upsampling.
 * Operações de normalização em lote e ativação ReLU para estabilizar o treinamento e acelerar a convergência.

Output Layer:
 * Uma camada de convolução final para mapear para o número desejado de classes com uma função de ativação sigmoid.

In [None]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization, Activation, Dropout, Conv2DTranspose

def segnet_model(img_size, num_classes):
    inputs = Input(shape=img_size + (3,))

    # Encoder
    c1 = Conv2D(64, (3, 3), padding='same')(inputs)
    c1 = BatchNormalization()(c1)
    c1 = Activation('relu')(c1)
    c1 = Conv2D(64, (3, 3), padding='same')(c1)
    c1 = BatchNormalization()(c1)
    c1 = Activation('relu')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(128, (3, 3), padding='same')(p1)
    c2 = BatchNormalization()(c2)
    c2 = Activation('relu')(c2)
    c2 = Conv2D(128, (3, 3), padding='same')(c2)
    c2 = BatchNormalization()(c2)
    c2 = Activation('relu')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(256, (3, 3), padding='same')(p2)
    c3 = BatchNormalization()(c3)
    c3 = Activation('relu')(c3)
    c3 = Conv2D(256, (3, 3), padding='same')(c3)
    c3 = BatchNormalization()(c3)
    c3 = Activation('relu')(c3)
    c3 = Conv2D(256, (3, 3), padding='same')(c3)
    c3 = BatchNormalization()(c3)
    c3 = Activation('relu')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = Conv2D(512, (3, 3), padding='same')(p3)
    c4 = BatchNormalization()(c4)
    c4 = Activation('relu')(c4)
    c4 = Conv2D(512, (3, 3), padding='same')(c4)
    c4 = BatchNormalization()(c4)
    c4 = Activation('relu')(c4)
    c4 = Conv2D(512, (3, 3), padding='same')(c4)
    c4 = BatchNormalization()(c4)
    c4 = Activation('relu')(c4)
    p4 = MaxPooling2D((2, 2))(c4)

    c5 = Conv2D(512, (3, 3), padding='same')(p4)
    c5 = BatchNormalization()(c5)
    c5 = Activation('relu')(c5)
    c5 = Conv2D(512, (3, 3), padding='same')(c5)
    c5 = BatchNormalization()(c5)
    c5 = Activation('relu')(c5)
    c5 = Conv2D(512, (3, 3), padding='same')(c5)
    c5 = BatchNormalization()(c5)
    c5 = Activation('relu')(c5)
    p5 = MaxPooling2D((2, 2))(c5)

    # Decoder
    u6 = UpSampling2D((2, 2))(p5)
    c6 = Conv2D(512, (3, 3), padding='same')(u6)
    c6 = BatchNormalization()(c6)
    c6 = Activation('relu')(c6)
    c6 = Conv2D(512, (3, 3), padding='same')(c6)
    c6 = BatchNormalization()(c6)
    c6 = Activation('relu')(c6)
    c6 = Conv2D(512, (3, 3), padding='same')(c6)
    c6 = BatchNormalization()(c6)
    c6 = Activation('relu')(c6)

    u7 = UpSampling2D((2, 2))(c6)
    c7 = Conv2D(512, (3, 3), padding='same')(u7)
    c7 = BatchNormalization()(c7)
    c7 = Activation('relu')(c7)
    c7 = Conv2D(512, (3, 3), padding='same')(c7)
    c7 = BatchNormalization()(c7)
    c7 = Activation('relu')(c7)
    c7 = Conv2D(256, (3, 3), padding='same')(c7)
    c7 = BatchNormalization()(c7)
    c7 = Activation('relu')(c7)

    u8 = UpSampling2D((2, 2))(c7)
    c8 = Conv2D(256, (3, 3), padding='same')(u8)
    c8 = BatchNormalization()(c8)
    c8 = Activation('relu')(c8)
    c8 = Conv2D(256, (3, 3), padding='same')(c8)
    c8 = BatchNormalization()(c8)
    c8 = Activation('relu')(c8)
    c8 = Conv2D(128, (3, 3), padding='same')(c8)
    c8 = BatchNormalization()(c8)
    c8 = Activation('relu')(c8)

    u9 = UpSampling2D((2, 2))(c8)
    c9 = Conv2D(128, (3, 3), padding='same')(u9)
    c9 = BatchNormalization()(c9)
    c9 = Activation('relu')(c9)
    c9 = Conv2D(64, (3, 3), padding='same')(c9)
    c9 = BatchNormalization()(c9)
    c9 = Activation('relu')(c9)

    u10 = UpSampling2D((2, 2))(c9)
    c10 = Conv2D(64, (3, 3), padding='same')(u10)
    c10 = BatchNormalization()(c10)
    c10 = Activation('relu')(c10)

    outputs = Conv2D(num_classes, (1, 1), activation='sigmoid')(c10)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model

model = segnet_model(img_size, num_classes)
model.summary()

## Loss Function

The loss function is defined as binary crossentropy. This is a common loss function used for binary classification tasks.


In [None]:
import tensorflow as tf
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

## Optimizer

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

## Compiling the Model

In [None]:
# Compile the model
model.compile(
    optimizer=optimizer, loss=loss, metrics=[keras.metrics.BinaryAccuracy()]
)

# Callback defined to save the best model during training
callbacks = [
    keras.callbacks.ModelCheckpoint("plot_segmentation.keras", save_best_only=True)
]

## Training
The model is trained using the training dataset. The number of epochs can be adjusted to improve the model performance.

In [None]:
epochs = 2 # Number of epochs for training

# Train the model, doing validation at the end of each epoch
model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=1,
)

# Results

## Testing Predictions
The model is tested on the test dataset to generate predictions. The predictions are then visualized to evaluate the model performance.##

In [None]:
test_preds = model.predict(valid_dataset)

## Displaying the Results
The original image, mask and predicted mask are displayed side by side for comparison.

In [None]:
for idx, (input, label) in enumerate(valid_dataset.unbatch()):
  f, axarr = plt.subplots(1,3)
  axarr[0].imshow(input)
  axarr[1].imshow(label.numpy()*255)
  axarr[2].imshow(test_preds[idx]*255)