# U-Net style model: main notebook
In this notebook we are going to define the main structure of the model and we are going to train it.

Importing needed packages and definition of useful variables

In [None]:
import tensorflow as tf
import time
import os
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
from datetime import datetime


seed = 1234
tf.random.set_seed(seed)

print(tf.__version__)

cwd = os.getcwd()

datasetDir = os.path.join(cwd, 'Segmentation_Dataset')
trainingImgDir = os.path.join(datasetDir, 'training', 'images')
trainingMaskDir = os.path.join(datasetDir, 'training', 'masks')
validImgDir = os.path.join(datasetDir, 'validation', 'images')
validMaskDir = os.path.join(datasetDir, 'validation', 'masks')

bs = 40

img_h = 256
img_w = 256

now = datetime.now().strftime('%b%d_%H-%M-%S')

Building the training generator and the validation generator

In [None]:
# Training set ( Images and Masks generators )
trainImgDataGen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=2,
    height_shift_range=2,
    zoom_range=0.05,
    #horizontal_flip=False,
    #vertical_flip=False,
    fill_mode='reflect',
    #cval=0,
    rescale=1. / 255
)

trainMaskDataGen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=2,
    height_shift_range=2,
    zoom_range=0.05,
    #horizontal_flip=False,
    #vertical_flip=False,
    fill_mode='reflect',
    #cval=0,
    rescale=1. / 255,
    dtype=tf.int32
)

trainImgGen = trainImgDataGen.flow_from_directory(trainingImgDir,
                                                  class_mode=None,
                                                  color_mode='rgb',
                                                  shuffle=True,
                                                  seed=seed,
                                                  batch_size=bs
                                                  )

trainMaskGen = trainMaskDataGen.flow_from_directory(trainingMaskDir,
                                                    class_mode=None,
                                                    color_mode='grayscale',
                                                    shuffle=True,
                                                    seed=seed,
                                                    batch_size=bs 
                                                    )

trainGen = (pair for pair in zip(trainImgGen, trainMaskGen))

# Validation set ( Images and Masks generators )
validImgDataGen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1. / 255
)

validMaskDataGen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1. / 255,
    dtype=tf.int32
)

validImgGen = validImgDataGen.flow_from_directory(validImgDir,
                                                  class_mode=None,
                                                  color_mode='rgb',
                                                  shuffle=False,
                                                  batch_size=bs
                                                  )

validMaskGen = validMaskDataGen.flow_from_directory(validMaskDir,
                                                    class_mode=None,
                                                    color_mode='grayscale',
                                                    shuffle=False,
                                                    batch_size=bs
                                                    )

validGen = (pair for pair in zip(validImgGen, validMaskGen))

CNN Structure: U-Net like structure, so we use skipping connections in order to have a better predictions result.

In [None]:

inp1 = tf.keras.Input(shape=(img_h,img_w,3)) 

# Encoding Part

c1 = tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(inp1)
c1 = tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c1)
m = tf.keras.layers.MaxPool2D(pool_size=(2,2))(c1)
#128
c2= tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(m)
c2= tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c2)
m = tf.keras.layers.MaxPool2D(pool_size=(2,2))(c2)
#64
c3 = tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(m)
c3 = tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c3)
m = tf.keras.layers.MaxPool2D(pool_size=(2,2))(c3)
#32
c4 = tf.keras.layers.Conv2D(filters=128,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(m)
c4 = tf.keras.layers.Conv2D(filters=128,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c4)
m = tf.keras.layers.MaxPool2D(pool_size=(2,2))(c4)
#16
c5 = tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(m)
c5 = tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c5)
m = tf.keras.layers.MaxPool2D(pool_size=(2,2))(c5)
#8

c6 = tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(m)
c6 = tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),padding='same',activation=tf.keras.activations.relu)(c6)

#Decoding path with skipping connections

ct = tf.keras.layers.Conv2DTranspose(filters=256, kernel_size=(3,3), padding='same', strides=(2,2), activation=tf.keras.activations.relu)(c6)
conc = tf.keras.layers.Concatenate()([ct,c5])
c = tf.keras.layers.Conv2D(filters = 256, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(conc)
c = tf.keras.layers.Conv2D(filters = 256, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(c)

ct = tf.keras.layers.Conv2DTranspose(filters=128, kernel_size=(3,3), padding='same', strides=(2,2), activation=tf.keras.activations.relu)(c)
conc = tf.keras.layers.Concatenate()([ct,c4])
c = tf.keras.layers.Conv2D(filters = 128, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(conc)
c = tf.keras.layers.Conv2D(filters = 128, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(c)

ct = tf.keras.layers.Conv2DTranspose(filters=64, kernel_size=(3,3),padding='same',strides=(2,2),activation=tf.keras.activations.relu)(c)
conc = tf.keras.layers.Concatenate()([ct,c3])
c = tf.keras.layers.Conv2D(filters = 64, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(conc)
c = tf.keras.layers.Conv2D(filters = 64, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(c)

ct = tf.keras.layers.Conv2DTranspose(filters=32, kernel_size=(3,3),padding='same',strides=(2,2),activation=tf.keras.activations.relu)(c)
conc = tf.keras.layers.Concatenate()([ct,c2])
c = tf.keras.layers.Conv2D(filters = 32, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(conc)
c = tf.keras.layers.Conv2D(filters = 32, kernel_size=(3,3),activation = tf.keras.activations.relu,padding='same')(c)

ct = tf.keras.layers.Conv2DTranspose(filters=16, kernel_size=(3,3),padding='same',strides=(2,2),activation=tf.keras.activations.relu)(c)
conc = tf.keras.layers.Concatenate()([ct,c1])
c = tf.keras.layers.Conv2D(filters = 16, kernel_size=(3,3) , activation = tf.keras.activations.relu,padding='same')(conc)
c = tf.keras.layers.Conv2D(filters = 8, kernel_size=(1,1) , activation = tf.keras.activations.relu,padding='same')(c)

#Prediction Layer
#Two feature maps in output, distinguishing between background and building
out = tf.keras.layers.Conv2D(filters=2,kernel_size=(1,1),padding='same',activation=tf.keras.activations.softmax)(c)

#Creation of the model
model = tf.keras.Model(inp1,out)

# output 256x256x1
model.summary()

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

Definition of the metric taken into consideration: Intersection over Union

In [None]:
def my_IoU(y_true, y_pred):
    # from pobability to predicted class {0, 1}
    y_pred = tf.argmax(input=y_pred, axis=3)
    y_pred = tf.cast(y_pred, tf.float32)
    y_pred = tf.expand_dims(y_pred, -1)
    # A and B
    intersection = tf.reduce_sum(y_true * y_pred)
    # A or B
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    # IoU
    return intersection / union

Training phase

In [None]:
modelDir = os.path.join(cwd, 'models')
if not os.path.exists(modelDir):
    os.makedirs(modelDir)

model.compile(loss=loss, optimizer=optimizer, metrics=[my_IoU])

#Fit_generator in order to reduce the overhead due to mantaining the data in memory
model.fit_generator(trainGen,
                    epochs=50, verbose= 2,
                   steps_per_epoch=len(trainImgGen),
                   validation_data=validGen,
                   validation_steps=len(validImgGen)
)

model.save(os.path.join(modelDir, 'model_' + now + '.h5'))

Defining useful functions and performing predictions over the test set

In [None]:
csvDir = os.path.join(cwd, 'csv_files')

if not os.path.exists(csvDir):
    os.makedirs(csvDir)

def create_csv(results, results_dir):
    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

    with open(os.path.join(results_dir,csv_fname), 'w') as f:

        f.write('ImageId,EncodedPixels,Width,Height\n')

        for key, value in results.items():
            f.write(key + ',' + str(value) + ',' + '256' + ',' + '256' + '\n')

def rle_encode(img):
    # Flatten column-wise
    pixels = img.numpy().T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

results = {}
print('Test set predictions...')
image_filenames = os.listdir(os.path.join(datasetDir, 'test', 'images', 'img'))
for image in image_filenames:
        img = Image.open(os.path.join(datasetDir, 'test', 'images', 'img', image))
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, 0)
        img_array = tf.cast(img_array,tf.float32)/255
        prediction = model.predict(x = img_array)
        image = image[0:image.find('.')]
        results[image] = rle_encode(tf.argmax(input=prediction, axis=3))

print('Work done.')
print('Writing results...')
create_csv(results, csvDir)
print('Results Written.')


f, axarr = plt.subplots(1,2)

for image in image_filenames:
        img = Image.open(os.path.join(datasetDir, 'test', 'images', 'img', image))
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, 0)
        img_array = tf.cast(img_array,tf.float32)/255
        prediction = model.predict(x = img_array)
        img_array = np.array(img_array)
        prediction = np.array(tf.argmax(prediction,-1))
        axarr[0].imshow(img_array.squeeze())
        axarr[1].imshow(prediction.squeeze())
        f.show()
        time.sleep(4)