**Note**: This notebook was executed on Kaggle due to the lack of local computational resources.

In [1]:
pip install patchify

Collecting patchify
  Downloading patchify-0.2.3-py3-none-any.whl (6.6 kB)
Installing collected packages: patchify
Successfully installed patchify-0.2.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
import keras_tuner as kt
from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, Concatenate, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD

import os
import sys
import shutil

sys.path.append(os.path.join(os.getcwd(), '..', 'input', 'cbamunet-tuning'))
sys.path.append(os.path.join(os.getcwd(), '..', 'input', 'road-segmentation-data-loader'))

from cbam_unet import *
from cbam import *
from load_data import *



In [3]:
import tensorflow as tf
from tensorflow.python.client import device_lib

# Prevent automatic GPU memory pre-allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(gpu)
    tf.config.experimental.set_memory_growth(gpu, True)

print(tf.__version__)

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
2.13.0


# 0. Get dataset

In [4]:
# Read in images from directory and create tf.data.Dataset
# Get file names
imgs_dir = os.path.join(os.path.dirname(os.getcwd()), 'input', 'road-segmentation-ds', 'training', 'images')
gts_dir = os.path.join(os.path.dirname(os.getcwd()), 'input', 'road-segmentation-ds', 'training', 'groundtruth')

# Since using iou loss, need to one hot encode the groundtruth images
original_dataset = create_dataset(imgs_dir, gts_dir, one_hot = True)

print(f'Loaded data set: {original_dataset}')

Loaded data set: <_MapDataset element_spec=(TensorSpec(shape=(400, 400, 3), dtype=tf.float32, name=None), TensorSpec(shape=(400, 400, 2), dtype=tf.float32, name=None))>


In [5]:
# Generate the patches
patched_dataset = generate_patches(original_dataset, (128,128), 0, True, True)
print(f'Patched data set: {patched_dataset}')

Patched data set: <_TensorSliceDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>


In [6]:
# Split into train and validation split
# Augmentation is only performed on the training split

# Shuffle the dataset
# Seed of 1 for reproducibility
train_prop = 0.9
num_samples = len(patched_dataset)

# seed and reshuffle_each_iteration = False to prevent reshuffling after each iteration of dataset for reproducibility
seed = 1
shuffled_dataset = patched_dataset.shuffle(buffer_size = num_samples, seed = seed, reshuffle_each_iteration = False)  
print(f'Shuffled data set: {shuffled_dataset}, no. of samples: {len(shuffled_dataset)}')

train_size = int(train_prop*num_samples)
train_dataset = shuffled_dataset.take(train_size)
val_dataset = shuffled_dataset.skip(train_size)
print(f'Train data set: {train_dataset}, no.of samples: {len(train_dataset)}')
print(f'Validation data set: {val_dataset}, no.of samples: {len(val_dataset)}')

Shuffled data set: <_ShuffleDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>, no. of samples: 1600
Train data set: <_TakeDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>, no.of samples: 1440
Validation data set: <_SkipDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>, no.of samples: 160


In [7]:
# Augment the train split
print(f'Original train dataset: {train_dataset}')
print(f'No. of samples before augmenting: {len(train_dataset)}')

num_images = [2, 1, 2] # [num_brightness, num_rotation, num_noise]
augmented_train_dataset = create_augmented_dataset(train_dataset, num_images)

print(f'Augmented train dataset: {augmented_train_dataset}')
print(f'No. of samples after augmenting: {len(augmented_train_dataset)}')

Original train dataset: <_TakeDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>
No. of samples before augmenting: 1440
Augmented train dataset: <_ConcatenateDataset element_spec=(TensorSpec(shape=(128, 128, 3), dtype=tf.float32, name=None), TensorSpec(shape=(128, 128, 2), dtype=tf.float32, name=None))>
No. of samples after augmenting: 25920


In [8]:
# Split augmented train dataset into train and validation. These validations will be used to validate the model while the
# validation dataset obtained prior to augmenting the dataset will be used to test the model

# Seperate the datasets into img and gt and store as np.asarray
train_imgs = np.asarray(list(augmented_train_dataset.map(lambda img, gt: img)))
train_gts = np.asarray(list(augmented_train_dataset.map(lambda img, gt: gt)))
val_imgs = np.asarray(list(val_dataset.map(lambda img, gt: img)))
val_gts = np.asarray(list(val_dataset.map(lambda img, gt: gt)))

# Check shapes of np arrays
print(f'train_imgs shape: {train_imgs.shape}')
print(f'train_gts shape: {train_gts.shape}')
print(f'val_imgs shape: {val_imgs.shape}')
print(f'val_gts shape: {val_gts.shape}')

train_imgs shape: (25920, 128, 128, 3)
train_gts shape: (25920, 128, 128, 2)
val_imgs shape: (160, 128, 128, 3)
val_gts shape: (160, 128, 128, 2)


# 1. Define custom loss and prepare model

In [9]:
# Get class weights
# Determine class weights which will be used in model later on
road_pixel_prop, bg_pixel_prop = get_class_weights(imgs_dir, gts_dir)
print(f'Road pixel proportions: {road_pixel_prop}, Background pixel proportions: {bg_pixel_prop}')

ROAD_WEIGHT = 1./road_pixel_prop
BG_WEIGHT = 1./bg_pixel_prop
print(f'Road weight: {ROAD_WEIGHT}, Background weight: {BG_WEIGHT}')

Road pixel proportions: 0.22525987499999997, Background pixel proportions: 0.7747401249999997
Road weight: 4.439317033271017, Background weight: 1.2907554000768973


In [10]:
# Define loss function
def flatten_arrs(y_true, y_pred):

        y_true_background = K.flatten(y_true[...,0])
        y_true_road = K.flatten(y_true[...,1:])

        y_pred_background = K.flatten(y_pred[...,0])
        y_pred_road = K.flatten(y_pred[...,1:])

        return y_true_background, y_true_road, y_pred_background, y_pred_road

    
def compute_intersection(y_true, y_pred):

    return tf.cast(tf.reduce_sum(y_true * y_pred), dtype = tf.float32)


def compute_union(y_true, y_pred):

    intersection = compute_intersection(y_true, y_pred)
    return tf.cast(tf.reduce_sum(y_true), dtype = tf.float32) + tf.cast(tf.reduce_sum(y_pred), dtype = tf.float32) - intersection


def iou(y_true, y_pred):

    epsilon = 1e-7

    intersection = compute_intersection(y_true, y_pred)
    union = compute_union(y_true, y_pred)

    return (intersection + epsilon)/ (union + epsilon)


def compute_iou(y_true, y_pred, background_weight = 0.5, road_weight = 0.5):

    y_true_background, y_true_road, y_pred_background, y_pred_road = flatten_arrs(y_true, y_pred)

    background_iou = iou(y_true_background, y_pred_background)
    road_iou = iou(y_true_road, y_pred_road)

    return (BG_WEIGHT*background_iou) + (ROAD_WEIGHT*road_iou)


def compute_loss(y_true, y_pred):

    '''Implementation code to calculate weighted iou loss. 

    Args:
        y_true: tensor of shape (batch_size, 400, 400, 1), containing pixel values of groundtruth image
        y_pred: tensor of shape (batch_size, 400, 400, 2), containing pixel values of prediction from model. Each channel corresponds to one segmented class.

    Returns:
        loss
    '''    

    # add a constant to ensure that loss is positive when applying class weights
    constant = 10.
    return (1. - compute_iou(y_true, y_pred, BG_WEIGHT, ROAD_WEIGHT)) + 10.

In [11]:
def apply_att_and_crop(x_att_applied, x, new_size):

    '''Implementation code for applying attention and concatenating it to upsampled feature map

    Args:
        x: Input features of shape (height, width, channel) from downsampled portion
        depth: Depth in U-Net model, to decide which CBAM_Module to apply
        new_size: Desired size of cropped image, to match with the upsampled size from deeper layers

    Returns:
        cropped features from downsmpled portion with attention applied
    '''

    # Apply attention
    x_att_shape = tf.shape(x_att_applied)

    # Calculate the crop sizes for height and width
    h_crop_size = (x_att_shape[1] - new_size[1]) // 2
    w_crop_size = (x_att_shape[2] - new_size[2]) // 2

    # Calculate the starting and ending indices for cropping along height
    h_start = h_crop_size
    h_end = h_start + new_size[1]

    # Calculate the starting and ending indices for cropping along width
    w_start = w_crop_size
    w_end = w_start + new_size[2]

    # Return the cropped image tensor
    return x_att_applied[:, h_start:h_end, w_start:w_end, :]

In [12]:
class HyperModel(kt.HyperModel):
    
    def build(self, hp):

        lr = hp.Choice('learning_rate', [0.01, 0.001, 0.0001])
        pooling_dropout = hp.Boolean('pooling_dropout', default = False)
        l2_reg_conv2d = hp.Choice('l2_reg_conv2d', [0.0, 0.01])
        
        cbam_depth1 = CBAM_Module(1, 64)
        cbam_depth2 = CBAM_Module(2, 128)
        cbam_depth3 = CBAM_Module(3, 256)
        cbam_depth4 = CBAM_Module(4, 512)

        inpt = Input(shape=(128,128,3))

        x_0 = inpt
        # encoder conv. block 1
        x = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(inpt)
        x = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x_1 = x
        x = MaxPooling2D(pool_size = 2, strides = 2, padding = 'same')(x)
        
        if pooling_dropout:
            x = Dropout(rate = 0.2)(x)
            
        # encoder conv. block 2
        x = Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x_2 = x
        x = MaxPooling2D(pool_size = 2, strides = 2, padding = 'same')(x)
        
        if pooling_dropout:
            x = Dropout(rate = 0.2)(x)
            
        # encoder conv. block 3
        x = Conv2D(filters = 256, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 256, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x_3 = x
        x = MaxPooling2D(pool_size = 2, strides = 2, padding = 'same')(x)

        if pooling_dropout:
            x = Dropout(rate = 0.2)(x)
            
        # encoder conv. block 4
        x = Conv2D(filters = 512, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 512, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)    
        x_4 = x
        x = MaxPooling2D(pool_size = 2, strides = 2, padding = 'same')(x)

        if pooling_dropout:
            x = Dropout(rate = 0.2)(x)
            
        # encoder conv. block 5
        x = Conv2D(filters = 1024, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 1024, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)

        # conv transpose depth 5
        x = Conv2DTranspose(filters = 512, kernel_size = 2, strides = 2, padding = 'same')(x)
        attention_x_4 = cbam_depth4(x_4)
        attention_x_4 = apply_att_and_crop(attention_x_4, x_4, tf.shape(x_4))
        x = Concatenate(axis = -1)([attention_x_4, x])
        # decoder conv. block 4
        x = Conv2D(filters = 512, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 512, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)

        # conv transpose depth 4
        x = Conv2DTranspose(filters = 256, kernel_size = 2, strides = 2, padding = 'same')(x)
        attention_x_3 = cbam_depth3(x_3)
        attention_x_3 = apply_att_and_crop(attention_x_3, x_3, tf.shape(x))
        x = Concatenate(axis = -1)([attention_x_3, x])
        # decoder conv. block 3
        x = Conv2D(filters = 256, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 256, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)

        # conv transpose depth 3
        x = Conv2DTranspose(filters = 128, kernel_size = 2, strides = 2, padding = 'same')(x)
        attention_x_2 = cbam_depth2(x_2)
        attention_x_2 = apply_att_and_crop(attention_x_2, x_2, tf.shape(x))
        x = Concatenate(axis = -1)([attention_x_2, x])
        # decoder conv. block 2
        x = Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)

        # conv transpose depth 2
        x = Conv2DTranspose(filters = 64, kernel_size = 2, strides = 2, padding = 'same')(x)
        attention_x_1 = cbam_depth1(x_1)
        attention_x_1 = apply_att_and_crop(attention_x_1, x_1, tf.shape(x))
        x = Concatenate(axis = -1)([attention_x_1, x])

        # output conv. block
        x = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        x = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = 'same', activation = 'relu',
                   kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.L2(l2_reg_conv2d))(x)
        output = Conv2D(filters = 2, kernel_size = 1, activation = 'sigmoid')(x)
        
        model = tf.keras.Model(inputs = inpt, outputs = output)

        optimiser_choices = ['Adam', 'SGD']
        optimiser = hp.Choice('optimiser', optimiser_choices)

        if optimiser == "adam":
            optimiser = tf.keras.optimizers.Adam(learning_rate=lr)
        elif optimiser == "SGD":
            optimiser = tf.keras.optimizers.SGD(learning_rate=lr)

        model.compile(optimizer = optimiser, loss = compute_loss, metrics = ['accuracy'])

        return model

    def fit(self, hp, model, x_train, y_train, validation_data = None, **kwargs):
        
        return model.fit(x_train, y_train,
                         validation_data = validation_data,
                         batch_size = hp.Choice('batch_size', [4, 8, 16]),
                         **kwargs,
                        )

# 2. Start tuning

In [13]:
CONTINUE_TUNE = True

In [14]:
if CONTINUE_TUNE:
    source_path = os.path.join(os.path.dirname(os.getcwd()), 'input', 'cbamunet-tuning-trials', 'cbam_tuning')
    destination_path = os.path.join(os.getcwd(), 'cbam_tuning')
    shutil.copytree(source_path, destination_path)
    tuner = kt.BayesianOptimization(
            HyperModel(),
            objective = 'val_loss',
            max_trials = 25,
            directory = 'cbam_tuning',
            overwrite = False,
            project_name = 'cbam_tuning_run1'
            )

else:
    tuner = kt.BayesianOptimization(
        HyperModel(),
        objective = 'val_loss',
        max_trials = 25,
        directory = 'cbam_tuning',
        project_name = 'cbam_tuning_run1'
        )

In [15]:
tuner.search_space_summary()

Search space summary
Default search space size: 5
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
pooling_dropout (Boolean)
{'default': False, 'conditions': []}
l2_reg_conv2d (Choice)
{'default': 0.0, 'conditions': [], 'values': [0.0, 0.01], 'ordered': True}
optimiser (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'SGD'], 'ordered': False}
batch_size (Choice)
{'default': 4, 'conditions': [], 'values': [4, 8, 16], 'ordered': True}


In [16]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
tuner.search(train_imgs, train_gts,
            validation_data = (val_imgs, val_gts),
            epochs = 100,
            callbacks = [stop_early])

Trial 25 Complete [00h 25m 16s]
val_loss: 9.048707962036133

Best val_loss So Far: 6.797310829162598
Total elapsed time: 03h 12m 59s


# 3. Analysis of results

In [17]:
tuner.results_summary()

Results summary
Results in cbam_tuning/cbam_tuning_run1
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 15 summary
Hyperparameters:
learning_rate: 0.01
pooling_dropout: True
l2_reg_conv2d: 0.0
optimiser: SGD
batch_size: 16
Score: 6.797310829162598

Trial 21 summary
Hyperparameters:
learning_rate: 0.01
pooling_dropout: False
l2_reg_conv2d: 0.0
optimiser: SGD
batch_size: 16
Score: 7.055900573730469

Trial 01 summary
Hyperparameters:
learning_rate: 0.001
pooling_dropout: False
l2_reg_conv2d: 0.0
optimiser: SGD
batch_size: 16
Score: 7.1150922775268555

Trial 03 summary
Hyperparameters:
learning_rate: 0.001
pooling_dropout: True
l2_reg_conv2d: 0.0
optimiser: SGD
batch_size: 16
Score: 7.579310417175293

Trial 05 summary
Hyperparameters:
learning_rate: 0.001
pooling_dropout: False
l2_reg_conv2d: 0.0
optimiser: SGD
batch_size: 4
Score: 8.634018898010254

Trial 00 summary
Hyperparameters:
learning_rate: 0.01
pooling_dropout: False
l2_reg_conv2d: 0.0
optimiser: SGD
batc

In [18]:
best_hp = tuner.get_best_hyperparameters()[0]
print(best_hp.values)

{'learning_rate': 0.01, 'pooling_dropout': True, 'l2_reg_conv2d': 0.0, 'optimiser': 'SGD', 'batch_size': 16}
