# Semantic Segmentation of Water using U-Net
# Part 7 - Hyperparameter Tuning

In [2]:
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.layers import concatenate, Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, save_img
import numpy as np
import json, os
from random import shuffle
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import warnings
import re
import time

from unetlib.metrics import BinaryMeanIoU
from unetlib.model import UNet_BN
from unetlib.preprocessing import get_lakes_with_masks, make_dataframes_for_flow, make_img_msk_flows
import unetlib.visualisation as vs
from unetlib.pipelines import train_unet

To tune the learning rate hyperparameter, and any others for that matter, a selection of values should be tried and whichever yields the best validation loss should be kept. It is not practical or efficient to test every possible value so a common strategy in the literature is to test powers of 10 e.g. 0.001, 0.01, 0.1 etc.

Another common approach is to allow the learning rate to be decreased during the training process. This means that earlier steps can make larger movements but as the model converges, the learning rate gets smaller so small steps can me made to avoid overshooting the minimum. One way of implementing this is to use a learning rate scheduler to reduce the rate after a certain number of epochs, though it can be difficult to determine at which epochs the rate should be reudced. An alternative is to reduce the learning rate whenever the loss doesnt improve for a certain amount of time.

The `RMSProp` optimiser i'm using also has a `momentum` parameter. This essentially allows gradient descent to build up speed and can help pass local minima or saddle points. RMSProp also includes a dampening factor, `rho` which helps slow the process to avoid overshooting the minimum.

Speed up convergence - batch norm / learning rate / momentum

Dropout (if overfitting)

Activations - could try sigmoid or tanh (adjut batch norm appropriately)

Ensemble? e.g. train several smaller models and average the predictions

In [101]:
# Imagery directories
nwpu_data_dir = 'nwpu_lake_images/data/'
nwpu_mask_dir = 'nwpu_lake_images/masks/'

In [102]:
def train_unet(model, nwpu_data_dir, nwpu_mask_dir, callbacks=None,
               batch_size=16, epochs=100):
    """
    Conveniennce wrapper to train a model on the NWPU data.

    Parameters
    ----------
    model
    nwpu_data_dir
    nwpu_mask_dir
    callbacks
    batch_size
    epochs

    Returns
    -------
    history

    """
    # Split the test/train data
    (train_img_df, train_msk_df,
     test_img_df, test_msk_df) = make_dataframes_for_flow(nwpu_data_dir,
                                                          nwpu_mask_dir,
                                                          test_size=0.25,
                                                          random_state=42
                                                          )

    # Split the training data into train and validation generators
    # with augmentation applied to the training data only
    aug_dict = {'rotation_range': 90,
                'horizontal_flip': True,
                'vertical_flip': True,
                'width_shift_range': 0.15,
                'height_shift_range': 0.15,
                'zoom_range': 0.25
                }

    (train_gen, val_gen,
     train_fps, val_fps) = make_img_msk_flows(train_img_df, train_msk_df,
                                              nwpu_data_dir, nwpu_mask_dir,
                                              val_split=0.3, rescale=1 / 255.,
                                              aug_dict=aug_dict,
                                              batch_size=batch_size
                                              )

    # Compute steps per epoch
    train_steps = int(np.ceil(len(train_fps) / batch_size))
    val_steps = int(np.ceil(len(val_fps) / batch_size))


    # Train the model
    history = model.fit(train_gen, epochs=epochs, steps_per_epoch=train_steps,
                        validation_data=val_gen, validation_steps=val_steps,
                        callbacks=callbacks
                        )

    return history

In [118]:
class TrainingTimer(tf.keras.callbacks.Callback):
    """Times the models training process.
    """
    def __init__(self):
        self.logs = {}
    def on_train_begin(self, logs=None):
        self.logs['start_time'] = time.time()

    def on_train_end(self, logs=None):
        self.logs['stop_time'] = time.time()
        self.logs['runtime'] = self.logs['stop_time'] - self.logs['start_time']
        
    def runtime_seconds(self):
        return self.logs.get('runtime', 0.)
    
    def runtime_minutes(self):
        return self.runtime_seconds() / 60
    
    def runtime_hours(self):
        return self.runtime_seconds() / 3600

## Optimiser and Learning Rate

In [104]:
def compile_model(model, optimiser, lr=None):
    """Convenience function to compile model
    
    """
    # configure optimiser
    if isinstance(optimiser, str):
        optimiser = tf.keras.optimizers.get(optimiser)
        if lr is not None:
            optimiser.lr = lr

    # Compile model
    model.compile(optimizer=optimiser,
                  loss='binary_crossentropy',
                  metrics=[BinaryMeanIoU(threshold=0.5)]
                  )

In [115]:
train_times = {}
optimisers = ['RMSProp', 'Adam']
learning_rates = [0.001, 0.01, 0.1]
save_dir = 'model_outputs/'

for opt, lr in product(optimisers, learning_rates):
    model = UNet_BN(n_filters=64, n_blocks=4, bn_pos='before', model_name='deepwide')
    compile_model(model, opt, lr)
    
    # Generate filename for output files
    opt_conf = model.optimizer.get_config()
    o_name = opt_conf['name']
    o_lr = f"{tf.keras.backend.eval(opt_conf['learning_rate']):.3f}"
    base_fn = os.path.join(save_dir, f"{model.name}_{o_name}_lr{o_lr}{{}}")
    
    ## Configure callbacks
    # Checkpointer
    checkpointer = tf.keras.callbacks.ModelCheckpoint(base_fn.format('.weights.h5'),
                                                      save_best_only=True,
                                                      save_weights_only=True
                                                      )
    # Timer
    timer = TrainingTimer()
    
    callbacks = [checkpointer, timer]

    
    # Train model
    history = train_unet(model, nwpu_data_dir, nwpu_mask_dir,
                         callbacks=callbacks)
    
    # Update training times dictionary
    train_times[base_fn.format('')] = timer
    
    # Save history to pickle
    with open(base_fn.format('.history.pickle'), 'wb') as f:
        pickle.dump(history.history, f)

deepwide_f64_b4_bnbefore_RMSprop_lr0.001{}
deepwide_f64_b4_bnbefore_RMSprop_lr0.010{}
deepwide_f64_b4_bnbefore_RMSprop_lr0.100{}
deepwide_f64_b4_bnbefore_Adam_lr0.001{}
deepwide_f64_b4_bnbefore_Adam_lr0.010{}
deepwide_f64_b4_bnbefore_Adam_lr0.100{}
