# Building a simple UNet-like model

We will build a simple **UNet-like model** using our generated data of images and masks in png format. You can refer to **[this notebook](https://www.kaggle.com/code/dingyan/hubmap-images-and-masks-original-size-3000x3000)** for generating images and masks.

If you feel this notebook is helpful, please **upvote**! Thank you.

**1. Get the images and masks path from our generated data. Random shuffle images and masks.**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import numpy as np
from tqdm import tqdm
import random
import tifffile as tiff
import pandas as pd

img_size = 512
image_dir = '../input/hubmap-data/HuBMAP_train/images'
mask_dir = '../input/hubmap-data/HuBMAP_train/masks'

image_path_list = os.listdir(image_dir)
mask_path_list = os.listdir(mask_dir)

# Shuffle the file paths (they were originally sorted by breed). 
# We use the same seed (1337) in both statements to ensure that 
# the input paths and target paths stay in the same order.
random.Random(1337).shuffle(image_path_list)
random.Random(1337).shuffle(mask_path_list)

img_num = len(image_path_list)

**2. Load images**

We use *keras.utils.load_img* to load images and masks and we can specify *target_size* (here we resize images to 512) to resize images which is convenient. We need to convert images to numpy array with *keras.utils.img_to_array*. Be careful, the values are range from 0 to 255. So we will need to rescale it to 0 - 1.

In [None]:
def get_image(path, img_size):
    image = keras.utils.img_to_array(keras.utils.load_img(path, target_size=(img_size, img_size)))
    return image/255.

**3. Load masks**

Masks we generated are already in range 0 - 1 so we don't need to rescale it. Just be careful that data type is in **float32** when we load images and convert to array with *keras.utils.img_to_array* . So we need to cast float32 to uint8 for model training. In this competition, we only need to classify the mask so it is a **binary classification** just like foreground and background.

In [None]:
def get_mask(path, img_size):
    mask = keras.utils.img_to_array(keras.utils.load_img(path, target_size=(img_size, img_size), color_mode='grayscale'))
    mask = tf.cast(mask, tf.uint8)
    return mask

**4. Since the dataset is not very big, we can just load everything into memory:**

In [None]:
input_images = np.zeros(shape=(img_num, img_size, img_size, 3), dtype='float32')
input_masks = np.zeros(shape=(img_num, img_size, img_size, 1), dtype='uint8')

for i in tqdm(range(len(image_path_list))):
    image_path = os.path.join(image_dir, image_path_list[i])
    mask_path = os.path.join(mask_dir, mask_path_list[i])
    input_images[i, :, :, :] = get_image(image_path, img_size)
    input_masks[i, :, :, :] = get_mask(mask_path, img_size)
    

**5. Let's take a look at the masked images**

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 15))
for i in range(3*3):
    plt.subplot(3, 3, i+1)
    plt.imshow(input_images[i])
    plt.imshow(input_masks[i], alpha=0.3)
    

The images and masks are ready for feeding into the model.

**6. We will only reserve 30 samples for validation as we don't have a lot of training data**

In [None]:
val_num = 30
train_images = input_images[:-val_num]
train_masks = input_masks[:-val_num]
val_images = input_images[-val_num:]
val_masks = input_masks[-val_num:]

**7. Build the model**

The first half of the model closely resembles a stack of Conv2D layers, with gradually increasing filter sizes, ending up with activations of size
(32, 32, 512). It is like a kind of compression.

The second half of the model is a stack of Conv2DTranspose layers to get our final output to have the same shape as the target masks which is (512, 512, 1)

The last layers is used to do binary classification for mask so activation = 'sigmoid'. Let's first try loss = 'binary_crossentropy'.

In [None]:
# smaller network
tf.keras.backend.clear_session()
def get_model(image_size, num_classes):
    inputs = keras.Input(shape=(image_size, image_size, 3))
    x = layers.Conv2D(32, 3, strides=2, activation='relu', padding='same')(inputs)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(x)
    x = layers.Conv2D(64, 3, strides=2, activation='relu', padding='same')(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.Conv2D(128, 3, strides=2, activation='relu', padding='same')(x)
    x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(x)
    x = layers.Conv2D(256, 3, activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(256, 3, activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(256, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.Conv2DTranspose(128, 3, activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(128, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.Conv2DTranspose(64, 3, activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(64, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.Conv2DTranspose(32, 3, activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(32, 3, activation='relu', strides=2, padding='same')(x)
    outputs = layers.Conv2D(num_classes, 3, activation='sigmoid', padding='same')(x)
    model = keras.Model(inputs, outputs)
    return model
model = get_model(img_size, 1)
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
model.summary()

In [None]:
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=30), keras.callbacks.ModelCheckpoint('mymodel', save_best_only=True)]
history = model.fit(train_images, train_masks, epochs=100, validation_data=(val_images, val_masks), batch_size=4, callbacks=callbacks)

In [None]:
epochs = range(len(history.history['loss']))
plt.plot(epochs[10:], history.history['loss'][10:], label='train loss')
plt.plot(epochs[10:], history.history['val_loss'][10:], label='val loss')
plt.legend()
plt.show()

**8. Visualize the predictions on validation images.**

In [None]:
threshold = 0.5 # here I set 0.5 as threshold to classify the mask to be 0 or 1 

model = keras.models.load_model('./mymodel')

def visualize_predictions(images, masks, ids):
    plt.figure(figsize=(cols*5, rows*5))
    for i in range(0, rows*cols, 2):
        plt.subplot(rows, cols, i+1)
        plt.axis('off')
        plt.title('Groundtruth')
        plt.imshow(images[ids[i]])
        plt.imshow(masks[ids[i]], alpha=0.3)
        
        plt.subplot(rows, cols, i+2)
        plt.title('Prediction')
        plt.axis('off')
        preds = model.predict(np.expand_dims(images[ids[i]], 0))[0]
        pred_mask = np.where(preds > threshold, 1, 0)
        plt.imshow(images[ids[i]])
        plt.imshow(pred_mask, alpha=0.3)

rows = 5
cols = 2
sampled_ids = np.random.choice(range(len(val_images)), rows*cols, replace=False)
visualize_predictions(val_images, val_masks, sampled_ids)

It looks pretty good in validation data given such a simple model. Moreover we haven't add any data augmentation yet. 

**Next steps we can try:**
1. Add data augmentation.
2. Try different model structures or loss functions.
3. Seek some public external data.
4. ...

If you have good ideas, please leave comments below. 

**9. Submission**

In [None]:
# ref.: https://www.kaggle.com/stainsby/fast-tested-rle
def rle_encode(img):
    """ TBD
    
    Args:
        img (np.array): 
            - 1 indicating mask
            - 0 indicating background
    
    Returns: 
        run length as string formated
    """
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def preprocess_tiff_image(path):
    image_array = tiff.imread(path)
    original_shape = image_array.shape
    image_processed = keras.utils.img_to_array(keras.utils.array_to_img(image_array).resize((512, 512)))/255
    return image_processed, original_shape


**First, let's visualize the sample test image**

In [None]:
sample_test_path = '../input/hubmap-organ-segmentation/test_images/10078.tiff'
image, _ = preprocess_tiff_image(sample_test_path)
pred = model.predict(np.expand_dims(image, axis=0))
pred_mask = np.where(pred > threshold, 1, 0)[0]
plt.figure(figsize=(6, 6))
plt.axis('off')
plt.imshow(image)
plt.imshow(pred_mask, alpha=0.3)

**At last, let's generate the submission file. Don't forget to resize the mask to original image size.**

In [None]:
test_df = pd.read_csv('../input/hubmap-organ-segmentation/test.csv')
test_ids = test_df['id']
test_dir = '../input/hubmap-organ-segmentation/test_images'

ids = []
rles = []
for id in test_ids:
    path = os.path.join(test_dir, f"{id}.tiff")
    image, original_shape = preprocess_tiff_image(path)
    pred = model.predict(np.expand_dims(image, axis=0))
    pred_mask = np.where(pred > 0.5, 1, 0)[0]
    resized_pred_mask = keras.utils.array_to_img(pred_mask, scale=False).resize((original_shape[0], original_shape[1]), resample=0)
    resized_pred_mask_array = keras.utils.img_to_array(resized_pred_mask, dtype='uint8')
    rle = rle_encode(resized_pred_mask_array)
    ids.append(id)
    rles.append(rle)
    
submission_df = pd.DataFrame({'id':ids,'rle':rles})
submission_df.to_csv('submission.csv', index=False)

In [None]:
submission_df

This is a baseline for quickly building an image segmentation model. If you feel helpful, please **upvote**! 