# Training Model
Keras on Tensorflow

trained from  /2020_01_24/model_2020_01_24_16_02.h5

In [1]:
import os, sys
import cv2
import numpy as np
import uuid
from skimage.io import imread, imsave, imshow
from PIL import Image, ImageTk
import matplotlib.pyplot as plt
from imutils import paths
import itertools
import json
from pprint import pprint



import tensorflow as tf
from tensorflow.keras import backend as K

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from core.imageprep import dir_checker, random_crop, crop_generator, random_crop_batch
from core.models import UNet, UNet_hp, vanilla_unet, vanilla_unet_nodrop
from core.metrics import iou_coef, dice_coef

from tensorflow.python.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorboard.plugins.hparams import api as hp
from datetime import datetime

from IPython import get_ipython
# %load_ext autoreload
get_ipython().run_line_magic('load_ext', 'autoreload')
# %autoreload 2
get_ipython().run_line_magic('autoreload', '2')
# %load_ext tensorboard
get_ipython().run_line_magic('load_ext', 'tensorboard')

In [2]:
from packaging import version
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

TensorFlow version:  2.0.0


In [3]:
sys.executable

'C:\\Users\\wucci_admin\\Anaconda3\\envs\\tfdl02\\python.exe'

 ## Load Training Dataset

In [4]:
# load image
print("Load Images...")
# on mac
# path = "/Volumes/LaCie_DataStorage/PerlmutterData/"

# on Window PC 
path = os.path.join('D:', 'PerlmutterData')

# input set
crop_input_set = '2020_01_23_09_51_20x'
# crop_input_set = '2020_01_30_11_24_1x' # small training set

imginput = os.path.join('dl_seg_project_raw', 'data_crop', crop_input_set,)
imgpath = os.path.join(path, imginput)

print(imgpath)

img_dir = os.path.join(imgpath, 'images')
label_dir = os.path.join(imgpath, 'labels')
print(img_dir)
print(label_dir)

# check if the output folder exist. If not, create a folder
dir_checker('logs', path)
path_logs = os.path.join(path, 'logs')
dir_checker('fit', path_logs)
dir_checker('model', path_logs)
dir_checker('pars', path_logs)

Load Images...
D:PerlmutterData\dl_seg_project_raw\data_crop\2020_01_23_09_51_20x
D:PerlmutterData\dl_seg_project_raw\data_crop\2020_01_23_09_51_20x\images
D:PerlmutterData\dl_seg_project_raw\data_crop\2020_01_23_09_51_20x\labels
logs exists in D:PerlmutterData
fit exists in D:PerlmutterData\logs
model exists in D:PerlmutterData\logs
pars exists in D:PerlmutterData\logs


 Print the first file name.

In [5]:
imgpath_all = list(paths.list_images(imgpath))
print(imgpath_all[0])

D:PerlmutterData\dl_seg_project_raw\data_crop\2020_01_23_09_51_20x\images\nucleus\0001.tif


 ## Create Image Datagenerator
 1. create only one datagen
 2. specify valiation split in datagen argument
 3. add split data when calling `datagen.flow_from_directory`

In [6]:
timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M")
date =  datetime.now().strftime("%Y_%m_%d")
seed = 102
batch_size = 16
epoch = 500
validation_steps = 20
validation_split = 0.3
training_sample_size = len(imgpath_all)
IMG_HEIGHT = None
IMG_WIDTH = None
classes = ['cell_membrane', 'nucleus', 'autophagosome']
inputclass = [classes[1]]
learning_rate = 1e-5
loss = "binary_crossentropy"
metrics = ['accuracy', iou_coef, dice_coef]

metrics_name = []
for f in metrics:
    if callable(f):
        metrics_name.append(f.__name__)
    else:
        metrics_name.append(f)

In [7]:
# create arguments for data generator
data_gen_img_args = dict(
                # featurewise_center = True,
                # featurewise_std_normalization = True,
                horizontal_flip = True,
                vertical_flip = True,
                rotation_range = 90.,
                width_shift_range = 0.1,
                height_shift_range = 0.1,
                shear_range = 0.07,
                zoom_range = 0.2,
                validation_split = validation_split, # <- specify validation_split ratio
                # fill_mode='constant',
                # cval=0.,
                rescale=1.0/255.0,
                )

data_gen_label_args = dict(
                # featurewise_center=True,
                # featurewise_std_normalization=True,
                horizontal_flip = True,
                vertical_flip = True,
                rotation_range = 90.,
                width_shift_range = 0.1,
                height_shift_range = 0.1,
                shear_range = 0.07,
                zoom_range = 0.2,
                validation_split = validation_split, # <- specify validation_split ratio
                # fill_mode='constant',
                # cval=0.,
                # rescale=1.0/255.0,
                rescale=1.0/255.0,
                )

# create parameter
pars = dict(
                # basic information
                timestamp = timestamp,
                date = date,
                seed = seed,
                batch_size = batch_size,
                
                # Data generator
                crop_input_set = crop_input_set,
                validation_steps = validation_steps,
                validation_split = validation_split,
                training_sample_size = training_sample_size,
                
                # training class
                classes = classes,
                inputclass = inputclass,
    
                # add datagen args
                data_gen_img_args = data_gen_img_args,
                data_gen_label_args = data_gen_label_args,
                
                # Build model
                IMG_HEIGHT = IMG_HEIGHT,
                IMG_WIDTH = IMG_WIDTH,
                epoch = epoch, 
                loss = loss,
                metrics_name = metrics_name,
                learning_rate = learning_rate,
                )

# save parameter
path_pars = os.path.join(path_logs, 'pars')
dir_checker(inputclass[0], path_pars)
dir_checker(date, os.path.join(path_pars, inputclass[0]))

pprint(pars)

nucleus exists in D:PerlmutterData\logs\pars
2020_02_04 exists in D:PerlmutterData\logs\pars\nucleus
{'IMG_HEIGHT': None,
 'IMG_WIDTH': None,
 'batch_size': 16,
 'classes': ['cell_membrane', 'nucleus', 'autophagosome'],
 'crop_input_set': '2020_01_23_09_51_20x',
 'data_gen_img_args': {'height_shift_range': 0.1,
                       'horizontal_flip': True,
                       'rescale': 0.00392156862745098,
                       'rotation_range': 90.0,
                       'shear_range': 0.07,
                       'validation_split': 0.3,
                       'vertical_flip': True,
                       'width_shift_range': 0.1,
                       'zoom_range': 0.2},
 'data_gen_label_args': {'height_shift_range': 0.1,
                         'horizontal_flip': True,
                         'rescale': 0.00392156862745098,
                         'rotation_range': 90.0,
                         'shear_range': 0.07,
                         'validation_split': 0.3,
   

In [8]:
par_file_dir = os.path.join(path_pars, inputclass[0], date, 'pars_' + timestamp + '.json')
print(par_file_dir)

with open(par_file_dir, 'w') as outfile:
    json.dump(pars, outfile, indent=4)


D:PerlmutterData\logs\pars\nucleus\2020_02_04\pars_2020_02_04_16_15.json


In [9]:
# create generator
image_datagen = ImageDataGenerator(**data_gen_img_args)
label_datagen = ImageDataGenerator(**data_gen_label_args)

In [11]:
# load images into generator
train_image_generator = image_datagen.flow_from_directory(
    img_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='training', # <- define subset as 'training'
    seed=seed)

train_label_generator = label_datagen.flow_from_directory(
    label_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='training',
    seed=seed)

valid_image_generator = image_datagen.flow_from_directory(
    img_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='validation', # <- define subset as 'validation'
    seed=seed)

valid_label_generator = label_datagen.flow_from_directory(
    label_dir,
    class_mode=None,
    classes=inputclass,
    color_mode='grayscale',
    batch_size=batch_size,
    subset='validation',
    seed=seed)

Found 4634 images belonging to 1 classes.
Found 4634 images belonging to 1 classes.
Found 1986 images belonging to 1 classes.
Found 1986 images belonging to 1 classes.


In [12]:

# merge image and label generator
def combine_generator(gen1, gen2):
    while True:
        yield(gen1.next(), gen2.next()) 
train_generator = combine_generator(train_image_generator, train_label_generator)
valid_generator = combine_generator(valid_image_generator, valid_label_generator)


In [13]:
'''
train_generator = zip(train_image_generator, train_label_generator)
valid_generator = zip(valid_image_generator, valid_label_generator)
'''

'\ntrain_generator = zip(train_image_generator, train_label_generator)\nvalid_generator = zip(valid_image_generator, valid_label_generator)\n'

 ## Training

In [14]:
print("Start training...")

Start training...


 ### Define Callbacks

In [15]:
# checkpoint
modelfilename = 'model_' + timestamp + '.h5'
dir_checker(inputclass[0], os.path.join(path_logs, 'model'))
dir_checker(date, os.path.join(path_logs, 'model', inputclass[0]))
modelfile_path = os.path.join(path_logs, 'model', inputclass[0], date, modelfilename)

checkpointer = ModelCheckpoint(filepath = modelfile_path, 
                               monitor = 'val_loss', 
                               mode = 'min', 
                               verbose = 1, 
                               save_best_only = True)

# early stopping 
early_stopping = EarlyStopping(monitor='val_loss',
                           patience=8,
                           verbose=1,
                           min_delta=1e-4)

# learning rate adjustment
reduceLR = ReduceLROnPlateau(monitor='val_loss',
                    factor=0.1,
                    patience=4,
                    verbose=1,
                    min_delta=1e-4)

# tensorboard
from tensorflow.summary import create_file_writer

dir_checker(inputclass[0], os.path.join(path_logs, 'fit'))
dir_checker(date, os.path.join(path_logs, 'fit', inputclass[0]))
logdir = os.path.join(path_logs, 'fit', inputclass[0], date, timestamp)

file_writer = create_file_writer(logdir + "/metrics")
file_writer.set_as_default()

metrics = ['accuracy', iou_coef, dice_coef]


tensorboard_callback = TensorBoard(log_dir = logdir, 
                                   profile_batch = 0, 
                                   update_freq= 500,
                                   histogram_freq = 1
                                   )

# compile callbacks
# callbacks = [checkpointer, tensorboard_callback, early_stopping, reduceLR]
callbacks = [checkpointer, tensorboard_callback, reduceLR]

nucleus exists in D:PerlmutterData\logs\model
2020_02_04 exists in D:PerlmutterData\logs\model\nucleus
nucleus exists in D:PerlmutterData\logs\fit
2020_02_04 exists in D:PerlmutterData\logs\fit\nucleus


 ### Setup the model

In [16]:
# calculate steps_per_epoch
steps_per_epoch = training_sample_size * (1 - validation_split) // batch_size
print("Steps per epoch: {}".format(steps_per_epoch))

Steps per epoch: 579.0


In [17]:
print(logdir)

D:PerlmutterData\logs\fit\nucleus\2020_02_04\2020_02_04_16_15


In [18]:
'''
# prepare the model
unetmodel = UNet(shape = (IMG_HEIGHT, IMG_WIDTH), 
                 lr = learning_rate, 
                 loss = loss,
                 metrics = metrics,
                )

# train the model
unetmodel.fit_generator(
                    generator = train_generator, 
                    validation_data = valid_generator,
                    validation_steps = validation_steps,
                    steps_per_epoch = steps_per_epoch,
                    epochs = epoch,  
                    callbacks = callbacks,
                    verbose = 1, 
                    )
'''

'\n# prepare the model\nunetmodel = UNet(shape = (IMG_HEIGHT, IMG_WIDTH), \n                 lr = learning_rate, \n                 loss = loss,\n                 metrics = metrics,\n                )\n\n# train the model\nunetmodel.fit_generator(\n                    generator = train_generator, \n                    validation_data = valid_generator,\n                    validation_steps = validation_steps,\n                    steps_per_epoch = steps_per_epoch,\n                    epochs = epoch,  \n                    callbacks = callbacks,\n                    verbose = 1, \n                    )\n'

In [22]:
from tensorflow.keras.models import load_model

path_model = os.path.join(os.path.join(path_logs, 'fit', inputclass[0]), '2020_02_03', '2020_02_03_09_07', 'run-2', 'model', 'model_2020_02_03_09_07.h5')

dropout = 0.7
num_layers = 4

unetmodel = vanilla_unet_nodrop(
                            shape = (IMG_HEIGHT, IMG_WIDTH), 
                            dropout = dropout, 
                            num_layers = num_layers, 
                            lr = learning_rate, 
                            loss = loss,
                            metrics = metrics,
                            summary = True,
                            use_batch_norm = False
                           )

unetmodel.load_weights(path_model)

unetmodel.fit_generator(
                    generator = train_generator, 
                    validation_data = valid_generator,
                    validation_steps = validation_steps,
                    steps_per_epoch = steps_per_epoch,
                    epochs = epoch,  
                    callbacks = callbacks,
                    verbose = 1, 
                    )

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, None, None, 6 640         input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_24 (Conv2D)              (None, None, None, 6 36928       conv2d_23[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D)  (None, None, None, 6 0           conv2d_24[0][0]                  
____________________________________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.32404, saving model to D:PerlmutterData\logs\model\nucleus\2020_02_04\model_2020_02_04_16_15.h5
Epoch 2/500
Epoch 00002: val_loss improved from 0.32404 to 0.19213, saving model to D:PerlmutterData\logs\model\nucleus\2020_02_04\model_2020_02_04_16_15.h5
Epoch 3/500
Epoch 00003: val_loss did not improve from 0.19213
Epoch 4/500
Epoch 00004: val_loss improved from 0.19213 to 0.16339, saving model to D:PerlmutterData\logs\model\nucleus\2020_02_04\model_2020_02_04_16_15.h5
Epoch 5/500
Epoch 00005: val_loss did not improve from 0.16339
Epoch 6/500

KeyboardInterrupt: 