__This tutorial is based on__:

1. [Image segmentation with a U-Net-like architecture, Francois Chollet](https://keras.io/examples/vision/oxford_pets_image_segmentation/)

In [None]:
import numpy as np
import glob
import random
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.layers import *

In [None]:
%matplotlib inline

## 1. Download Data

[Penn-Fudan Database for Pedestrian Detection and Segmentation](https://www.cis.upenn.edu/~jshi/ped_html/)

In [None]:
# download the Penn-Fudan dataset
!wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip .
# extract it in the current folder
!unzip -q PennFudanPed.zip

## 2. Looking at Examples 

Lets look at a random Image and its corresponding Mask

In [None]:
img_path = 'PennFudanPed/PNGImages/FudanPed00021.png'
mask_path = 'PennFudanPed/PedMasks/FudanPed00021_mask.png'


plt.figure(figsize=(16,16))

# Image
ax1 = plt.subplot(1, 2, 1)
image = load_img(img_path)
image = np.array(image)
ax1.imshow(image)
ax1.axis('off')

# Mask
ax2 = plt.subplot(1, 2, 2)
mask = np.array( load_img(mask_path, grayscale=True) )
ax2.imshow(mask)
ax2.axis('off')

Currently, each pedestrian is assigned a unique id in the mask:

- Background id : 0
- First Pedestrian : 1
- Second Pedestrian : 2, etc

In [None]:
np.unique(mask)

However, we will reduce this to a simpler binary problem of segementing out pedestrian class from the background class. This is often referred to as **'Semantic Segmentation'** where one mask is assigned to a whole class, as opposed to **'Instance Segmentation'** where each member of a class is given a unique mask.

In [None]:
mask = (mask != 0)

plt.figure(figsize=(8,8))
plt.imshow(mask)
plt.axis('off')

## 3. Define Data-Generator 

In [None]:
class Pedestrian_Datagenerator(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
            x[j] = img
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
        for j, path in enumerate(batch_target_img_paths):
            mask = load_img(path, target_size=self.img_size, color_mode="grayscale")
            mask = np.array(mask)
            mask = mask!=0
            y[j] = np.expand_dims(mask, 2)
        return x/255, y

## 4. Split Data into Train / Test 

In [None]:
# Collect all paths

input_dir = "PennFudanPed/PNGImages/"
target_dir = "PennFudanPed/PedMasks/"


input_img_paths = sorted(glob.glob(input_dir + '*.png'))
target_img_paths = sorted(glob.glob(target_dir + '*.png'))

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

In [None]:
# Split into Train / Validation
val_samples = 20
random.Random(42).shuffle(input_img_paths)
random.Random(42).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]

print(f'Train set size: {len(train_input_img_paths)} \nValidation set size: {len(val_input_img_paths)}')

In [None]:
# Instantiate data generators

img_size = (160, 160)
batch_size = 4

train_gen = Pedestrian_Datagenerator(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = Pedestrian_Datagenerator(batch_size, img_size, val_input_img_paths, val_target_img_paths)

## 5. Define U-Net like Model 

U-net is a popular architecture for image segmentation. The name comes from its U like structure as shown in the image below. The architecture broadly consists of two parts; a downsampling part consisting of a series of convolutions and max-pooling layers (like a regular CNN), followed by an upsampling part consisting of a series of up-convolutions and/or upsampling operations. For more detail on the U-Net architecture refer to the original [U-Net paper](https://arxiv.org/abs/1505.04597).  

<img src="https://upload.wikimedia.org/wikipedia/commons/2/2b/Example_architecture_of_U-Net_for_producing_k_256-by-256_image_masks_for_a_256-by-256_RGB_image.png" alt="U-net Image" width="600"/>

[src: [Wikipedia](https://en.wikipedia.org/wiki/U-Net)]

In [None]:
def unet(img_size):
    inputs = Input(shape=img_size + (3,))
    
    # First half of the network: downsampling 
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same')(inputs)
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same')(pool1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same')(pool2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    
    # Second half of the network: upsampling 
    up4 = Conv2D(128, 2, activation = 'relu', padding = 'same')(UpSampling2D(size = (4,4))(pool3))
    merge4 = concatenate([conv2,up4], axis = 3)
    conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same')(merge4)
    conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same')(conv4)
    up5 = Conv2D(64, 2, activation = 'relu', padding = 'same')(UpSampling2D(size = (2,2))(conv4))
    merge5 = concatenate([conv1,up5], axis = 3)
    conv5 = Conv2D(64, 3, activation = 'relu', padding = 'same')(merge5)
    conv5 = Conv2D(64, 3, activation = 'relu', padding = 'same')(conv5)
    conv5 = Conv2D(2, 3, activation = 'relu', padding = 'same')(conv5)
    conv6 = Conv2D(1, 1, activation = 'sigmoid')(conv5)

    model = keras.Model(inputs = inputs, outputs = conv6)

    return model


# Build model
model = unet(img_size)
model.summary()

## 6. Compile and Train

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy")

callbacks = [
    keras.callbacks.ModelCheckpoint("pedestrian_segmentation.h5", save_best_only=True)
]

In [None]:
# Compile and Train 
epochs = 40
history = model.fit(train_gen, epochs=epochs, validation_data=val_gen, callbacks=callbacks)

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(loss))

plt.figure()

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

## 7. Inference

#### a) On Train Set 

In [None]:
train_gen = Pedestrian_Datagenerator(1, img_size, train_input_img_paths, train_target_img_paths)
train_preds = model.predict(train_gen)

In [None]:
samples = 4
idxs = np.arange(0,20); random.Random(42).shuffle(idxs); idxs = idxs[:4]


print('\t\tImage \t\t\t\t Ground Truth \t\t\t\t Prediction')
for i in np.arange(samples):
    idx = idxs[i]
    x, y = train_gen[idx]
    pred = train_preds[idx]
    
    plt.figure(figsize=(16,16))
    
    # Image
    ax1 = plt.subplot(1, 3, 1)
    ax1.imshow(x[0])
    ax1.axis('off')
    #ax1.set_title('Image')
    
    # Mask
    ax2 = plt.subplot(1, 3, 2)
    ax2.imshow(y.squeeze())
    ax2.axis('off')
    #ax2.set_title('Ground Truth')
    
    # Pred
    ax3 = plt.subplot(1, 3, 3)
    ax3.imshow(pred.squeeze())
    ax3.axis('off')
    #ax3.set_title('Prediction')

#### b) On Validation Set 

In [None]:
val_gen = Pedestrian_Datagenerator(1, img_size, val_input_img_paths, val_target_img_paths)
val_preds = model.predict(val_gen)

In [None]:
samples = 4
idxs = np.arange(0,20); random.Random(42).shuffle(idxs); idxs = idxs[:4]


print('\t\tImage \t\t\t\t Ground Truth \t\t\t\t Prediction')
for i in np.arange(samples):
    idx = idxs[i]
    x, y = val_gen[idx]
    pred = val_preds[idx]
    
    plt.figure(figsize=(16,16))
    
    # Image
    ax1 = plt.subplot(1, 3, 1)
    ax1.imshow(x[0])
    ax1.axis('off')
    #ax1.set_title('Image')
    
    # Mask
    ax2 = plt.subplot(1, 3, 2)
    ax2.imshow(y.squeeze())
    ax2.axis('off')
    #ax2.set_title('Ground Truth')
    
    # Pred
    ax3 = plt.subplot(1, 3, 3)
    ax3.imshow(pred.squeeze())
    ax3.axis('off')
    #ax3.set_title('Prediction')