# Semantic Segmentation of Water using U-Net
# Part 5 - Training with Data Augmentation


In this part I will train a CNN using data augmentation and evaluate the effect of this strategy.

In [1]:
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.layers import concatenate, Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, save_img
import numpy as np
import json, os
from random import shuffle
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import warnings
import re
import time

In [2]:
from unetlib.preprocessing import get_lakes_with_masks, make_dataframes_for_flow, make_img_msk_flows
import unetlib.visualisation as vs

In [3]:
# Imagery directories
nwpu_data_dir = 'nwpu_lake_images/data/'
nwpu_mask_dir = 'nwpu_lake_images/masks/'

## Set Up Training & Validation Data

In [15]:
# Split the test/train data
train_img_df, train_msk_df, test_img_df, test_msk_df, = make_dataframes_for_flow(nwpu_data_dir,
                                                                                 nwpu_mask_dir,
                                                                                 test_size=0.25,
                                                                                 random_state=42
                                                                                )

In [16]:
# Split the training data into train and validation generators
# with augmentation applied to the training data only
aug_dict = {'rotation_range':90,
            'horizontal_flip':True,
            'vertical_flip':True,
            'width_shift_range':0.15,
            'height_shift_range':0.15,
            'zoom_range':0.25
           }

batch_size = 16

train_gen, val_gen, train_fps, val_fps = make_img_msk_flows(train_img_df, train_msk_df,
                                                            nwpu_data_dir, nwpu_mask_dir,
                                                            val_split=0.3, rescale=1/255.,
                                                            aug_dict=aug_dict,
                                                            batch_size=batch_size
                                                           )

Found 210 validated image filenames.
Found 210 validated image filenames.
Found 90 validated image filenames.
Found 90 validated image filenames.


## Set Up Model

In [10]:
from unetlib.model import UNet
from unetlib.metrics import BinaryMeanIoU

In [8]:
# Same configuration as baseline model from part 3
model = UNet(n_filters=32, n_blocks=2, model_name='baseline_aug')
print(model.name)
model.summary()

baseline_aug
Model: "baseline_aug"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 256, 256, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 32) 9248        conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 128, 128, 32) 0           conv2d_1[0][0]                   
__________________________________________________________________________

In [11]:
# Compile the model
model.compile(optimizer='RMSProp',
              loss='binary_crossentropy',
              metrics=[BinaryMeanIoU(threshold=0.5)]
             )

In [17]:
# Compute steps per epoch
train_steps = int(np.ceil(len(train_fps) / batch_size))
val_steps = int(np.ceil(len(val_fps) / batch_size))

print(f'Train Steps: {train_steps}')
print(f'Val Steps: {val_steps}')

Train Steps: 14
Val Steps: 6


## Train and Evaluate Model

In [18]:
# Number of times to cycle the full training set
epochs = 100

In [None]:
# Train the model and record the time taken
    
t1 = time.time()
history = model.fit(train_gen, epochs=epochs, steps_per_epoch=train_steps,
                    validation_data=val_gen, validation_steps=val_steps)
runtime = time.time() - t1

In [None]:
# How many minutes did training take?
print(runtime / 60)

In [None]:
# configure model output directory and filenames
output_dir = 'model_outputs'
os.makedirs(output_dir, exist_ok=True)

hist_filepath = os.path.join(output_dir,
                             f'{model.name}_bs{batch_size}e{epochs}.history.pickle')

weights_filepath = os.path.join(output_dir,
                                f'{model.name}_bs{batch_size}e{epochs}.weights.h5')

In [None]:
# Save history to pickle
with open(hist_filepath, 'wb') as f:
    pickle.dump(history.history, f)
    
# Save model weights
model.save_weights(weights_filepath)