# Semantic Segmentation with Neural Networks

In this session we will see how to build a simple ConvNet for a toy semantic segmentation problem. We train the network from scratch and we will see the particularities of the problem. Finally, we will use a state of the art object detection network on some real-world images.

## A toy example

We will deal with images with simple objects of different shapes and colors. Our goal is: given an image with objects of different shapes and colors, classify each pixel with a label corresponding the type of object it belongs to.

### Data preparation

We will create the dataset ourselves with these lines of code:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

In [None]:
palette = {(0,0,0):0, (0,0,255):1,(0,255,0):2,(255,0,0):3}
def convert_from_color_segmentation(arr_3d, palette, image_height=32, image_width=32):

    reshape_array = np.reshape(arr_3d, [image_height * image_width, 3])

    #still too slow!!
    arr_2d = np.fromiter([palette.get((x[0], x[1], x[2]), 0) for x in reshape_array],
                         reshape_array.dtype)

    return np.reshape(np.asarray(arr_2d), arr_3d.shape[0:2])

In [None]:
import cairo
num_imgs = 5000

img_size = 32
min_object_size = 4
max_object_size = 16
num_objects = 4

shape_labels = ['rectangle', 'circle', 'triangle']
num_shapes = len(shape_labels)

imgs = np.zeros((num_imgs, img_size, img_size, 4), dtype=np.uint8)  # format: BGRA
masks = np.zeros((num_imgs, img_size, img_size, 4), dtype=np.uint8)
masks_decoded = []
shapes = np.zeros((num_imgs, num_objects), dtype=int)
colors = np.zeros((num_imgs, num_objects), dtype=int)

colors = [[0,0,1],[0,1,0],[1,0,0],[1,0,1],[1,1,0]]
num_colors = len(colors)

for i_img in range(num_imgs):
    
    surface = cairo.ImageSurface.create_for_data(imgs[i_img], cairo.FORMAT_ARGB32, img_size, img_size)
    surface_mask = cairo.ImageSurface.create_for_data(masks[i_img], cairo.FORMAT_ARGB32, img_size, img_size)
    
    cr = cairo.Context(surface)
    cr_mask = cairo.Context(surface_mask)
    # Fill background white.
    cr.set_source_rgb(1, 1, 1)
    cr.paint()
    
    cr_mask.set_source_rgb(0,0,0)
    cr_mask.paint()
    
    # Draw random shapes.
    for i_object in range(num_objects):
        shape = np.random.randint(num_shapes)
        shapes[i_img, i_object] = shape
        if shape == 0:  # rectangle
            w, h = np.random.randint(min_object_size, max_object_size, size=2)
            x = np.random.randint(0, img_size - w)
            y = np.random.randint(0, img_size - h)
            cr.rectangle(x, y, w, h)
            cr_mask.rectangle(x, y, w, h)
            cr_mask.set_source_rgb(0,0,1)
            cr_mask.fill()
        elif shape == 1:  # circle   
            r = 0.5 * np.random.randint(min_object_size, max_object_size)
            x = np.random.randint(r, img_size - r)
            y = np.random.randint(r, img_size - r)
            cr.arc(x, y, r, 0, 2*np.pi)
            cr_mask.arc(x, y, r, 0, 2*np.pi)
            cr_mask.set_source_rgb(0,1,0)
            cr_mask.fill()
        elif shape == 2:  # triangle
            w, h = np.random.randint(min_object_size, max_object_size, size=2)
            x = np.random.randint(0, img_size - w)
            y = np.random.randint(0, img_size - h)
            cr.move_to(x, y)
            cr.line_to(x+w, y)
            cr.line_to(x+w, y+h)
            cr.line_to(x, y)
            cr.close_path()
            
            cr_mask.move_to(x, y)
            cr_mask.line_to(x+w, y)
            cr_mask.line_to(x+w, y+h)
            cr_mask.line_to(x, y)
            cr_mask.close_path()
            
            cr_mask.set_source_rgb(1,0,0)
            cr_mask.fill()
        
        # TODO: Introduce some variation to the colors by adding a small random offset to the rgb values.
        color = np.random.randint(num_colors)
        r,g,b = colors[color]
        max_offset = 0.3
        r_offset, g_offset, b_offset = max_offset * 2. * (np.random.rand(3) - 0.5)
        cr.set_source_rgb(r-max_offset+r_offset, g+g_offset, b+b_offset)
        cr.fill()
    masks_decoded.append(convert_from_color_segmentation(masks[i_img][:,:,0:3],palette))
        
imgs = imgs[..., 2::-1]
masks_decoded = np.array(masks_decoded)

In [None]:
masks_decoded.shape, imgs.shape

Let's look at one of the samples we created.

In [None]:
plt.imshow(masks_decoded[2])
plt.show()
plt.imshow(imgs[2])

In [None]:
from keras.utils import to_categorical
masks_decoded = masks_decoded.reshape(-1,1)
masks_decoded_cat = to_categorical(masks_decoded,num_classes = len(shape_labels) + 1)
masks_decoded_cat = masks_decoded_cat.reshape(num_imgs,img_size,img_size,len(shape_labels) + 1)

In [None]:
X = (imgs - np.mean(imgs)) / np.std(imgs)
X.shape, np.mean(X), np.std(X)
X.shape

In [None]:
i_train = int(0.6 * num_imgs)
i_val = int(0.7 * num_imgs)

train_X = X[:i_train]
val_X = X[i_train:i_val]
test_X = X[i_val:]
train_y = masks_decoded_cat[:i_train]
val_y = masks_decoded_cat[i_train:i_val]
test_y = masks_decoded_cat[i_val:]
test_imgs = imgs[i_val:]

### Model

We will build a simple model composed of 4 convolutional layers.

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D

In [None]:
model = Sequential([
        Conv2D(filters=64,kernel_size=3, input_shape=(X.shape[1:]),activation='relu',padding='same'), 
        Conv2D(filters=64,kernel_size=3,activation='relu', padding='same'),
        Conv2D(filters=32,kernel_size=3,activation='relu', padding='same'),
        Conv2D(filters=num_shapes+1,kernel_size=3,activation='softmax',padding='same')
    ])
model.compile('adadelta', 'categorical_crossentropy')
model.summary()

### Training

Let's train !

In [None]:
n_epochs = 50
history = model.fit(train_X,train_y,batch_size=512,epochs=n_epochs,validation_data=(val_X,val_y))

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

### Testing

In [None]:
preds = model.predict(test_X)

In [None]:
# Triangle:1, Circle:2, Square:3

for i,pred in enumerate(preds[0:5]):
    fig, (ax0, ax1,ax2) = plt.subplots(ncols=3,figsize=(30,10))
    argmax_pred = np.argmax(pred,axis=-1)
    cf0 = ax0.imshow(test_imgs[i])
    fig.colorbar(cf0,ax=ax0)
    cf1 = ax1.imshow(argmax_pred,vmin=0,vmax=3,cmap='magma')
    fig.colorbar(cf1,ax=ax1)
    cf2 = ax2.imshow(np.argmax(test_y[i],axis=-1),vmin=0,vmax=3,cmap='magma')
    fig.colorbar(cf2,ax=ax2)
    plt.show()

## Semantic Segmentation of Real-World Images

In [None]:
https://github.com/mzaradzki/neuralnets/blob/master/vgg_segmentation_keras/fcn16s_segmentation_keras2.ipynb