# Training Model
1. Keras & Tensorflow 2
2. hyper-parameter search
3. vanila-Unet: pull the hyper parameter setting out from the function of model
4. update: 04/21/2020
5. by Mike Chien-Cheng Shih

## Load Dependencies
* System managing modules: os, sys, glob, shutil
* Array calculation: numpy
* Image IO and processing: cv2, skimage, PIL
* Visualization: matplotlib
* Metadata handling: datetime, josn, pprint
* Customized Functions: 
    1. `core.imageprep`
    2. `core.models`
    3. `core.metrics`

In [1]:
import os, sys
import itertools
import numpy as np

# image
from imutils import paths
import cv2
from skimage.io import imread, imsave, imshow
from PIL import Image, ImageTk

# figure
import matplotlib.pyplot as plt

# metadata
import uuid
import json
from pprint import pprint
from datetime import datetime

# tensorflow
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau

# tensorboard
from tensorboard.plugins.hparams import api as hp

# customized function
from core.imageprep import dir_checker, random_crop, crop_generator, random_crop_batch
from core.models import UNet, UNet_hp, vanilla_unet, vanilla_unet_nodrop
from core.metrics import iou_coef, dice_coef

# exported from vscode
from IPython import get_ipython
# %load_ext autoreload
get_ipython().run_line_magic('load_ext', 'autoreload')
# %autoreload 2
get_ipython().run_line_magic('autoreload', '2')
# %load_ext tensorboard
get_ipython().run_line_magic('load_ext', 'tensorboard')

In [2]:
from packaging import version
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

TensorFlow version:  2.0.0


In [3]:
sys.executable

'C:\\Users\\wucci_admin\\Anaconda3\\envs\\tfdl02\\python.exe'

 ## Load Training Dataset
 * create image list `imgpath_all`

In [4]:
# load image
print("Load Images...")
# on mac
# path = "/Volumes/LaCie_DataStorage/PerlmutterData/"

# on Window PC 
path = os.path.join('D:', 'PerlmutterData')

# input set
# crop_input_set = '2020_01_23_09_51_20x'
crop_input_set = '2020_04_21_13_26_1x' # small training set

imginput = os.path.join('dl_seg_project_raw', 'data_crop', crop_input_set,)
imgpath = os.path.join(path, imginput)

print('input imgpath: {}'.format(imgpath))

img_dir = os.path.join(imgpath, 'images')
label_dir = os.path.join(imgpath, 'labels')

print('image dir: {}'.format(img_dir))
print('label dir: {}'.format(label_dir))

# check if the output folder exist. If not, create a folder
print('Generating output folders: ')
dir_checker('logs', path)
path_logs = os.path.join(path, 'logs')
dir_checker('fit', path_logs)
dir_checker('model', path_logs)
dir_checker('pars', path_logs)

# create input file list
imgpath_all = list(paths.list_images(img_dir))

Load Images...
input imgpath: D:PerlmutterData\dl_seg_project_raw\data_crop\2020_04_21_13_26_1x
image dir: D:PerlmutterData\dl_seg_project_raw\data_crop\2020_04_21_13_26_1x\images
label dir: D:PerlmutterData\dl_seg_project_raw\data_crop\2020_04_21_13_26_1x\labels
Generating output folders: 
logs exists in D:PerlmutterData
fit exists in D:PerlmutterData\logs
model exists in D:PerlmutterData\logs
pars exists in D:PerlmutterData\logs


 ## Create Image Datagenerator
 1. create only one datagen
 2. specify valiation split in datagen argument
 3. add split data when calling `datagen.flow_from_directory`

In [5]:
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M")
date =  datetime.now().strftime("%Y_%m_%d")
seed = 103
batch_size = 16
epoch = 20
validation_steps = 20
validation_split = 0.1
training_sample_size = len(imgpath_all)
IMG_HEIGHT = None
IMG_WIDTH = None
classes = ['cell_membrane', 'nucleus', 'autophagosome']
inputclass = [classes[1]]
learning_rate = 1e-4
loss = "binary_crossentropy"
metrics = ['accuracy', iou_coef, dice_coef]

metrics_name = []

# create a metrics name from str and function
for f in metrics:
    if callable(f):
        metrics_name.append(f.__name__)
    else:
        metrics_name.append(f)

print("Metrics: {}".format(metrics_name))

Metrics: ['accuracy', 'iou_coef', 'dice_coef']


## Setup Data Generator

In [7]:
# create arguments for data generator
data_gen_img_args = dict(
                # featurewise_center = True,
                # featurewise_std_normalization = True,
                horizontal_flip = True,
                vertical_flip = True,
                rotation_range = 90.,
                width_shift_range = 0.1,
                height_shift_range = 0.1,
                shear_range = 0.07,
                zoom_range = 0.2,
                validation_split = validation_split, # <- specify validation_split ratio
                # fill_mode='constant',
                # cval=0.,
                rescale=1.0/255.0,
                )

data_gen_label_args = dict(
                # featurewise_center=True,
                # featurewise_std_normalization=True,
                horizontal_flip = True,
                vertical_flip = True,
                rotation_range = 90.,
                width_shift_range = 0.1,
                height_shift_range = 0.1,
                shear_range = 0.07,
                zoom_range = 0.2,
                validation_split = validation_split, # <- specify validation_split ratio
                # fill_mode='constant',
                # cval=0.,
                # rescale=1.0/255.0,
                rescale=1.0/255.0,
                )

## Save Metadata
* Save metadata includes timestamp, seed, batch size, and parameters of datagen.  

In [13]:
# create parameter
pars = dict(
                # basic information
                timestamp = timestamp,
                date = date,
                seed = seed,
                batch_size = batch_size,
                
                # Data generator
                crop_input_set = crop_input_set,
                validation_steps = validation_steps,
                validation_split = validation_split,
                training_sample_size = training_sample_size,
                
                # training class
                classes = classes,
                inputclass = inputclass,
    
                # add datagen args
                data_gen_img_args = data_gen_img_args,
                data_gen_label_args = data_gen_label_args,
                
                # Build model
                IMG_HEIGHT = IMG_HEIGHT,
                IMG_WIDTH = IMG_WIDTH,
                epoch = epoch, 
                loss = loss,
                metrics_name = metrics_name,
                learning_rate = learning_rate,
                )

# save parameter
path_pars = os.path.join(path_logs, 'pars')
dir_checker(inputclass[0], path_pars)
dir_checker(date, os.path.join(path_pars, inputclass[0]))
pprint(pars)

# save pars
par_file_dir = os.path.join(path_pars, inputclass[0], date, 'pars_' + timestamp + '.json')
print(par_file_dir)

with open(par_file_dir, 'w') as outfile:
    json.dump(pars, outfile, indent=4)

nucleus exists in D:PerlmutterData\logs\pars
2020_04_21 exists in D:PerlmutterData\logs\pars\nucleus
{'IMG_HEIGHT': None,
 'IMG_WIDTH': None,
 'batch_size': 16,
 'classes': ['cell_membrane', 'nucleus', 'autophagosome'],
 'crop_input_set': '2020_04_21_13_26_1x',
 'data_gen_img_args': {'height_shift_range': 0.1,
                       'horizontal_flip': True,
                       'rescale': 0.00392156862745098,
                       'rotation_range': 90.0,
                       'shear_range': 0.07,
                       'validation_split': 0.1,
                       'vertical_flip': True,
                       'width_shift_range': 0.1,
                       'zoom_range': 0.2},
 'data_gen_label_args': {'height_shift_range': 0.1,
                         'horizontal_flip': True,
                         'rescale': 0.00392156862745098,
                         'rotation_range': 90.0,
                         'shear_range': 0.07,
                         'validation_split': 0.1,
    

## Create Datagen
Datagen does
1. images loading
2. on-the-fly data augmentation
3. create training and validation set

In [14]:
# create generator
image_datagen = ImageDataGenerator(**data_gen_img_args)
label_datagen = ImageDataGenerator(**data_gen_label_args)

In [16]:
# load images into generator
train_image_generator = image_datagen.flow_from_directory(
    img_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='training', # define subset as 'training'
    seed=seed)

train_label_generator = label_datagen.flow_from_directory(
    label_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='training',
    seed=seed)

valid_image_generator = image_datagen.flow_from_directory(
    img_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='validation', # define subset as 'validation'
    seed=seed)

valid_label_generator = label_datagen.flow_from_directory(
    label_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='validation',
    seed=seed)

Found 298 images belonging to 1 classes.
Found 298 images belonging to 1 classes.
Found 33 images belonging to 1 classes.
Found 33 images belonging to 1 classes.


In [17]:
# merge image and label generator
def combine_generator(gen1, gen2):
    while True:
        yield(gen1.next(), gen2.next()) 
train_generator = combine_generator(train_image_generator, train_label_generator)
valid_generator = combine_generator(valid_image_generator, valid_label_generator)

 ## Training

In [18]:
print("Start training...")

Start training...


 ### Setup the model

In [19]:
# calculate steps_per_epoch
steps_per_epoch = training_sample_size * (1 - validation_split) // batch_size + 1
print("Steps per epoch: {}".format(steps_per_epoch))

Steps per epoch: 109.0


## Hyperparameters Tuning 
1. method: grid search
2. Hyperparameters: `learning_rate`, `dropout`, and `layers` of Unet 
    1. learning rate: 0.1, 0.01, 0.001, 0.0001
    2. dropout: 0.5, 0.7
    3. layers: 4, 5

In [None]:
# Create a .v2 file for saving hyperparameter and evaluation
# so we can see the results on tensorboard
hparamtuning_dir = os.path.join(path_logs, 'fit', inputclass[0], date, timestamp)

HP_LEARNINGRATE = hp.HParam('learning_rate', hp.Discrete([0.1, 1e-2, 1e-3, 1e-4]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.5, 0.7]))
HP_LAYERS = hp.HParam('layers', hp.Discrete([4, 5]))

# hparams_list = [HP_DROPOUT, HP_LAYERS]

with tf.summary.create_file_writer(hparamtuning_dir).as_default():
    hp.hparams_config(
        hparams=[HP_LEARNINGRATE, HP_DROPOUT, HP_LAYERS],
        metrics=[hp.Metric('accuracy', display_name='Accuracy'), 
                 hp.Metric('iou_coef', display_name='IoU_Coef'), # create container for customized metrics
                 hp.Metric('dice_coef', display_name='Dice_Coef')], # the same
    )

## Parameters in DL
* model: vanilla U-net
    1. kernel_initializer = `'he_normal'`
    2. activation = `'relu'`
    3. padding = `'same'`
    4. `BatchNormalization`
* loss: `binary_crossentropy`
* `num_classes = 1`
* evaluation: 
    1. accuracy
    2. IOU
    3. DICE

In [None]:
def run(run_name, hparamtuning_dir, hparams):
    
    # checkpoint
    modelfilename = 'model_' + timestamp + '.h5'
    dir_checker(run_name, hparamtuning_dir)
    dir_checker('model', os.path.join(hparamtuning_dir, run_name))
    
    modelfile_path = os.path.join(hparamtuning_dir, run_name, 'model', modelfilename)
    checkpointer = ModelCheckpoint(filepath = modelfile_path, 
                                   monitor = 'val_accuracy', 
                                   mode = 'max', 
                                   verbose = 1, 
                                   save_best_only = True, 
                                  )

    # early stopping 
    early_stopping = EarlyStopping(monitor='val_loss',
                               patience=8,
                               verbose=1,
                               min_delta=1e-4)

    # learning rate adjustment
    reduceLR = ReduceLROnPlateau(monitor='val_loss',
                        factor=0.1,
                        patience=4,
                        verbose=1,
                        min_delta=1e-3,
                        cooldown = 2,
                        )

    # tensorboard ----------------------------------------------
    
    # file_writer = create_file_writer(os.path.join(path_logs, 'fit', inputclass[0], date, timestamp, "metrics"))
    # file_writer.set_as_default()

    metrics = ['accuracy', iou_coef, dice_coef]
    
    tensorboard_callback = TensorBoard(log_dir = os.path.join(hparamtuning_dir, run_name), 
                                       profile_batch = 0, 
                                       update_freq= 30,
                                       histogram_freq = 1
                                       )

    # compile callbacks
    # callbacks = [checkpointer, tensorboard_callback, early_stopping, reduceLR]
    callbacks = [checkpointer, reduceLR, tensorboard_callback]
    
    hparamtuning_runname_dir = os.path.join(hparamtuning_dir, run_name)
    
    
    with tf.summary.create_file_writer(hparamtuning_runname_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial

        # prepare the model -----------------------------------
        
        # load hyper-parameter
        learning_rate = float(hparams[HP_LEARNINGRATE])
        print('learning rate: {}'.format(learning_rate))
        
        dropout = float(hparams[HP_DROPOUT])
        print('dropout: {}'.format(dropout))
        
        num_layers = int(hparams[HP_LAYERS])
        print('num layers: {}'.format(num_layers))
        
        unetmodel = vanilla_unet_nodrop(
                            shape = (IMG_HEIGHT, IMG_WIDTH), 
                            dropout = dropout, 
                            num_layers = num_layers, 
                            lr = learning_rate, 
                            loss = loss,
                            metrics = metrics,
                            summary = False,
                           )
        
        # load model ------------------------------------------
        
        '''
        # load weight
        path_model = os.path.join('D:', 'PerlmutterData', 'logs', 'fit', 
                                'nucleus', 
                                '2020_02_05',
                                '2020_02_05_16_29',
                                'run-2',
                                'model',
                                'model_2020_02_05_16_29.h5',)
        
        unetmodel.load_weights(path_model)
        '''
        
        
        # train the model -------------------------------------
        unetmodel.fit_generator(
                            generator = train_generator, 
                            validation_data = valid_generator,
                            validation_steps = validation_steps,
                            steps_per_epoch = steps_per_epoch,
                            epochs = epoch,  
                            callbacks = callbacks,
                            verbose = 1, 
                            )
    
        _, accuracy, iou, dice,  = unetmodel.evaluate_generator(valid_generator, steps = 50, verbose=1)
        tf.summary.scalar('accuracy', accuracy, step = 1)
        tf.summary.scalar('iou_coef', iou, step = 1)
        tf.summary.scalar('dice_coef', dice, step = 1)
        
        # -----------------------------------------------------
        
        # clean memory
        K.clear_session()
        del unetmodel

In [22]:
#train the model
session_num = 0
for learning_rate in HP_LEARNINGRATE.domain.values:
    for dropout in HP_DROPOUT.domain.values:
        for layer in HP_LAYERS.domain.values:
            run_name = "run-{}".format(session_num)
            print('--- Starting trial: {}'.format(run_name))

            # create hyper-parameter
            hparams = {
                HP_LEARNINGRATE: learning_rate,
                HP_DROPOUT: dropout,
                HP_LAYERS: layer,
            }
            print(hparams)
            print({h.name: hparams[h] for h in hparams})

            # build model and traning
            run(run_name, hparamtuning_dir, hparams)

            session_num += 1


--- Starting trial: run-0
{HParam(name='learning_rate', domain=Discrete([0.0001, 0.001, 0.01, 0.1]), display_name=None, description=None): 0.0001, HParam(name='dropout', domain=Discrete([0.5, 0.7]), display_name=None, description=None): 0.5, HParam(name='layers', domain=Discrete([4, 5]), display_name=None, description=None): 4}
{'learning_rate': 0.0001, 'dropout': 0.5, 'layers': 4}
run-0 does not exist in D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21
model does not exist in D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21\run-0
learning rate: 0.0001
dropout: 0.5
num layers: 4
Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.51957, saving model to D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21\run-0\model\model_2020_04_21_16_21.h5
Epoch 2/20
Epoch 00002: val_accuracy improved from 0.51957 to 0.52311, saving model to D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21\run-0\model\model_2020_04_21_16_21.h5
Epoch 3/20
Epoch 

Epoch 18/20
Epoch 00018: val_accuracy did not improve from 0.53335
Epoch 19/20
Epoch 00019: val_accuracy did not improve from 0.53335
Epoch 20/20
Epoch 00020: val_accuracy did not improve from 0.53335

Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
--- Starting trial: run-1
{HParam(name='learning_rate', domain=Discrete([0.0001, 0.001, 0.01, 0.1]), display_name=None, description=None): 0.0001, HParam(name='dropout', domain=Discrete([0.5, 0.7]), display_name=None, description=None): 0.5, HParam(name='layers', domain=Discrete([4, 5]), display_name=None, description=None): 5}
{'learning_rate': 0.0001, 'dropout': 0.5, 'layers': 5}
run-1 does not exist in D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21
model does not exist in D:PerlmutterData\logs\fit\nucleus\2020_04_21\2020_04_21_16_21\run-1
learning rate: 0.0001
dropout: 0.5
num layers: 5
Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.52283, saving model to D:PerlmutterData\logs

Epoch 13/20

KeyboardInterrupt: 

In [None]:
print("Training finished")

## Tensorboard
* Tensorboard for demo
* Command: `tensorboard --logdir .\logs\fit\nucleus\2020_04_21\2020_04_21_16_21 --host 192.168.86.30 --port 6006`
* Link: [tensorboard](http://192.168.86.30:6006/)

* Tensorboard for nucleus
* Command: `tensorboard --logdir .\logs\fit\nucleus\2020_02_06\2020_02_06_16_39 --host 192.168.86.30 --port 6006`