In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image
import math
import pickle
import time
import json
import tensorflow.keras as keras
from keras.layers import Input, Layer, LeakyReLU, BatchNormalization, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Add, Lambda
from keras.models import Model, model_from_json, load_model
from keras.optimizers import SGD, Adam
from keras.regularizers import l2
from keras.callbacks import TerminateOnNaN, ModelCheckpoint, Callback, EarlyStopping
import keras.backend as K
import os
K.clear_session()
K.set_floatx('float32')
print("Running Tensorflow version", tf.__version__)
print("Keras is running on", K.backend(), "backend")

Running Tensorflow version 2.1.0
Keras is running on tensorflow backend


Using TensorFlow backend.


In [2]:
# problem with model with rounding 
'''
def roundingAlgo(x): 
    # first one that works with model_1 & model_2 
    # problem - this rounding function is slow: model_2 = 3 hours / epoch
    # comparison, model_0 = 20 mins / epoch
    # in addition, off by half with integer inputs (lower than actual value, e.g. floor(2) ≈ 1.5, floor(2.01) ≈ 2)
    # source: https://en.wikipedia.org/wiki/Floor_and_ceiling_functions#Continuity_and_series_expansions
    if True:
        result = x - 0.5
        for p in range(1, 7):
            result = result + K.sin(x * p * 2 * math.pi) / (p * math.pi)
    return result
# '''
'''     
def roundingAlgo(x):
    # second one that works with model_2 
    # problem - this rounding function is slower than first working algo: model_2 = 4,2 hours / epoch
    # comparison, model_0 = 20 mins / epoch
    # source: self
    return x - x % 1
# '''
# '''
def roundingAlgo(x): 
    # simplification of the first algo loop by simplifying the expression for range(1,7)
    # problem - rounding function is still slow = 2,5 hours / epoch
    # all non-speed problem of first algo still applies
    result = x - 0.5
    resultCos = K.cos(2 * math.pi * x)
    return result + K.sin(2 * math.pi * x) * (1 + resultCos) * (13 + 2 * resultCos - 18 * K.pow(resultCos, 2) - 32 * K.pow(resultCos, 3) + 80 * K.pow(resultCos, 4)) / 15
# '''
'''
def roundingAlgo(x): 
    # made to fool the engine to have a gradient
    return 0 * x + K.round(x)
# '''


# check https://github.com/keras-team/keras/issues/2218
# check https://github.com/keras-team/keras/issues/2221
# https://www.tensorflow.org/api_docs/python/tf/custom_gradient
class RoundClampQ7_12(Layer):
    def __init__(self, **kwargs):
        super(RoundClampQ7_12, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundClampQ7_12, self).build(input_shape)
    def call(self, X):
        return K.clip(roundingAlgo(X * 4096), -524288, 524287) / 4096.0
    def get_config(self):
        base_config = super(RoundClampQ7_12, self).get_config()
        return dict(list(base_config.items()))
class RoundOverflowQ7_12(Layer):
    def __init__(self, **kwargs):
        super(RoundOverflowQ7_12, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundOverflowQ7_12, self).build(input_shape)
    def call(self, X):
        return (((roundingAlgo(X * 4096) + 524288) % 1048576) - 524288) / 4096.0
    def get_config(self):
        base_config = super(RoundOverflowQ7_12, self).get_config()
        return dict(list(base_config.items()))
class RoundClampQ3_4(Layer):
    def __init__(self, **kwargs):
        super(RoundClampQ3_4, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundClampQ3_4, self).build(input_shape)
    def call(self, X):
        return K.clip(roundingAlgo(X * 16), -128, 127) / 16.0
    def get_config(self):
        base_config = super(RoundClampQ3_4, self).get_config()
        return dict(list(base_config.items()))
class RoundOverflowQ3_4(Layer):
    def __init__(self, **kwargs):
        super(RoundOverflowQ3_4, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundOverflowQ3_4, self).build(input_shape)
    def call(self, X):
        return (((roundingAlgo(X * 16) + 128) % 256) - 128) / 16.0
    def get_config(self):
        base_config = super(RoundOverflowQ3_4, self).get_config()
        return dict(list(base_config.items()))
class Identity(Layer):
    def __init__(self, **kwargs):
        super(Identity, self).__init__(**kwargs)
        self.trainable = False
    def call(self, X):
        return X
    def get_config(self):
        base_config = super(Identity, self).get_config()
        return dict(list(base_config.items()))
class IdentityFinalLayer(Layer):
    def __init__(self, **kwargs):
        super(IdentityFinalLayer, self).__init__(**kwargs)
        self.trainable = False
    def call(self, X):
        return X
    def get_config(self):
        base_config = super(IdentityFinalLayer, self).get_config()
        return dict(list(base_config.items()))
    
def DBL(previousLayer, layerFilter, kernelSize=(3, 3), roundingFunction=Identity, name=None):
    placeholder = ""
    if name is not None:
        placeholder = str(name)
    else:
        placeholder = str(time.time_ns())
    return roundingFunction(name="ThirdRound_"+placeholder, dtype=K.floatx())(
        LeakyReLU(alpha=0.1, dtype=K.floatx())(
            roundingFunction(name="SecondRound_"+placeholder, dtype=K.floatx())(
                BatchNormalization(name="BatchNorm_"+placeholder, dtype=K.floatx())(
                    roundingFunction(name="FirstRound_"+placeholder, dtype=K.floatx())(
                        Conv2D(filters=layerFilter, kernel_size=kernelSize, padding='same', use_bias=False, kernel_regularizer=l2(5e-4), name="Conv2D_"+placeholder, dtype=K.floatx())(
                            previousLayer
                        )
                    )
                )
            )
        )
    )
print("Custom layer classes successfully defined")

Custom layer classes successfully defined


In [3]:
classificationClass = 80

print("Number of class classification is", classificationClass)

Number of class classification is 80


In [4]:
print()
def rand(a=0, b=1):
    return np.random.rand()*(b-a) + a

def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
    '''
    random preprocessing for real-time data augmentation 
    
    random=True induces image processing (better data accuracy with cost of cycles)
    '''
    line = annotation_line.split()
    image = Image.open(line[0])
    iw, ih = image.size
    h, w = input_shape
    box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

    if not random:
        # resize image
        scale = min(w/iw, h/ih)
        nw = int(iw*scale)
        nh = int(ih*scale)
        dx = (w-nw)//2
        dy = (h-nh)//2
        image_data=0
        if proc_img:
            image = image.resize((nw,nh), Image.BICUBIC)
            new_image = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)/255.

        # correct boxes
        box_data = np.zeros((max_boxes,5))
        if len(box)>0:
            np.random.shuffle(box)
            if len(box)>max_boxes: box = box[:max_boxes]
            box[:, [0,2]] = box[:, [0,2]]*scale + dx
            box[:, [1,3]] = box[:, [1,3]]*scale + dy
            box_data[:len(box)] = box

        return image_data, box_data

    # resize image
    new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
    scale = rand(.25, 2)
    if new_ar < 1:
        nh = int(scale*h)
        nw = int(nh*new_ar)
    else:
        nw = int(scale*w)
        nh = int(nw/new_ar)
    image = image.resize((nw,nh), Image.BICUBIC)

    # place image
    dx = int(rand(0, w-nw))
    dy = int(rand(0, h-nh))
    new_image = Image.new('RGB', (w,h), (128,128,128))
    new_image.paste(image, (dx, dy))
    image = new_image

    # flip image or not
    flip = rand()<.5
    if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

    # distort image
    hue = rand(-hue, hue)
    sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
    val = rand(1, val) if rand()<.5 else 1/rand(1, val)
    x = rgb_to_hsv(np.array(image)/255.)
    x[..., 0] += hue
    x[..., 0][x[..., 0]>1] -= 1
    x[..., 0][x[..., 0]<0] += 1
    x[..., 1] *= sat
    x[..., 2] *= val
    x[x>1] = 1
    x[x<0] = 0
    image_data = hsv_to_rgb(x) # numpy array, 0 to 1

    # correct boxes
    box_data = np.zeros((max_boxes,5))
    if len(box)>0:
        np.random.shuffle(box)
        box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
        box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
        if flip: box[:, [0,2]] = w - box[:, [2,0]]
        box[:, 0:2][box[:, 0:2]<0] = 0
        box[:, 2][box[:, 2]>w] = w
        box[:, 3][box[:, 3]>h] = h
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
        if len(box)>max_boxes: box = box[:max_boxes]
        box_data[:len(box)] = box

    return image_data, box_data

def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    '''Preprocess true boxes to training input format

    Parameters
    ----------
    true_boxes: array, shape=(m, T, 5)
        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
    input_shape: array-like, hw, multiples of 32
    anchors: array, shape=(N, 2), wh
    num_classes: integer

    Returns
    -------
    y_true: list of array, shape like yolo_outputs, xywh are reletive value

    '''
    assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
    num_layers = len(anchors)//3 # default setting
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]

    true_boxes = np.array(true_boxes, dtype='float32')
    input_shape = np.array(input_shape, dtype='int32')
    boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
    true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
    true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]

    m = true_boxes.shape[0]
    grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
    y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
        dtype='float32') for l in range(num_layers)]

    # Expand dim to apply broadcasting.
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
    anchor_mins = -anchor_maxes
    valid_mask = boxes_wh[..., 0]>0

    for b in range(m):
        # Discard zero rows.
        wh = boxes_wh[b, valid_mask[b]]
        if len(wh)==0: continue
        # Expand dim to apply broadcasting.
        wh = np.expand_dims(wh, -2)
        box_maxes = wh / 2.
        box_mins = -box_maxes

        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)

        # Find best anchor for each true box
        best_anchor = np.argmax(iou, axis=-1)

        for t, n in enumerate(best_anchor):
            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = true_boxes[b,t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5+c] = 1

    return y_true

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    '''data generator for fit_generator'''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i==0:
                np.random.shuffle(annotation_lines)
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)
            image_data.append(image)
            box_data.append(box)
            i = (i + 1) % n
        image_data = np.array(image_data)
        box_data = np.array(box_data)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)

def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
print("Target data generator successfully defined")


Target data generator successfully defined


In [5]:
# diambil dari https://github.com/qqwweee/keras-yolo3
'''
MIT License

Copyright (c) 2018 qqwweee

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

image_size = (448,448)
image_height, image_width = image_size

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)

tinyYolo_anchors = get_anchors("../CNN-VLSI/tiny_yolo_anchors.txt")

def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    # edited by instructions in https://stackoverflow.com/questions/57558476/training-a-keras-model-yields-multiple-optimizer-errors
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou

def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1] + 1e-10)
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
    return loss

In [6]:
with open("../CNN-VLSI/train.txt") as trainText:
    train_annotation_lines = trainText.readlines()
with open("../CNN-VLSI/val.txt") as valText:
    val_annotation_lines = valText.readlines()
lenTrain = len(train_annotation_lines)
print("# of training data is", lenTrain)
lenVal = len(val_annotation_lines)
print("# of validation data is", lenVal)
np.random.shuffle(train_annotation_lines)
np.random.shuffle(val_annotation_lines)

# of training data is 117266
# of validation data is 4952


In [7]:
# model_0 does no rounding (float32 operation)

In [8]:
model_0 = None
# '''
model_0_input = Input(shape=(None, None, 3), name="model_0_inputLayer")
# model_0_pointer = model_0_input
print("Input shape:", model_0_input.shape) # 448 x 448 x 3
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_input, layerFilter=16, name="model_0_layer0_branch") 
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 448 x 448 x 16
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 224 x 224 x 16
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=32, name="model_0_layer1_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 224 x 224 x 32
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 112 x 112 x 32
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=64, name="model_0_layer2_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 112 x 112 x 64
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 56 x 56 x 64
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=128, name="model_0_layer3_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 56 x 56 x 128
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 28 x 28 x 128
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=256, name="model_0_layer4_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_0_branch0
model_0_branch0 = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 256
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=512, name="model_0_layer5_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_0_branch0)
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=1024, name="model_0_layer6_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 1024
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=256, kernelSize=(1, 1), name="model_0_layer7_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_0_branch0 (14 x 14 x 256), following model_0_branch00
model_0_branch00 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=128, kernelSize=(1, 1), name="model_0_layer8_branch00")
# model_0_pointer = model_0_branch00
# print(model_0_pointer.shape) # 14 x 14 x 128
model_0_branch00 = UpSampling2D()(model_0_branch00)
# model_0_pointer = model_0_branch00
# print(model_0_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_0_branch1 (unchanged from model_0_startBranch) and model_0_branch00
model_0_mergedBranch = Concatenate()([model_0_startBranch, model_0_branch00])
# model_0_pointer = model_0_mergedBranch
# print(model_0_pointer.shape) # 28 x 28 x 384
model_0_mergedBranch = DBL(roundingFunction=Identity, previousLayer=model_0_mergedBranch, layerFilter=256, name="model_0_layer9_branch1")
# model_0_pointer = model_0_mergedBranch
# print(model_0_pointer.shape) # 28 x 28 x 256
model_0_mergedBranch = DBL(roundingFunction=Identity, previousLayer=model_0_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_0_layerA_branch1")
# model_0_pointer = model_0_mergedBranch
print("Model output 1 shape:", model_0_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_0_mergedBranch = IdentityFinalLayer(name="model_0_outputLayer_1")(model_0_mergedBranch)
print() # OUTPUT = model_0_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_0_branch01
model_0_branch01 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=512, name="model_0_layer8_branch01")
# model_0_pointer = model_0_branch01
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch01 = DBL(roundingFunction=Identity, previousLayer=model_0_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_0_layer9_branch01")
# model_0_pointer = model_0_branch01
print("Model output 0 shape:", model_0_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_0_branch01 = IdentityFinalLayer(name="model_0_outputLayer_0")(model_0_branch01)
print() # OUTPUT = model_0_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_0_actual = Model(inputs=model_0_input, outputs=[model_0_branch01, model_0_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_0_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_0_actual.to_json())

try:
    model_0_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny-model_0.h5", by_name=True, skip_mismatch=True)
#     model_0_actual.load_weights("./saved_models/model_0_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_0")
except Exception as e:
    print("Failed to load existing model for model_0:", e)
try:
    model_0_actual.save_weights("./saved_models/model_0_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_0 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_0 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_0_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_0', arguments=setArgs_model_0)([*model_0_actual.output, *y_true_model_0])

model_0 = Model([model_0_actual.input, *y_true_model_0], model_0_loss)

# model_0_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_0 adalah pendekatan sehingga output model_0 sedekat mungkin dengan 0 (model_0 ≈ model_0_actual - y_true)
model_0_learnRate = 1e-2
model_0.compile(optimizer=Adam(lr=model_0_learnRate), loss={'yolo_loss_model_0': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_0 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()

Input shape: (None, None, None, 3)
Branch split from main branch - following branch 0
Branch split from branch 0 - following branch 0,0
Branch merge from branch 1 and branch 0,0
Model output 1 shape: (None, None, None, 255)

Branch split from branch 0 - following branch 0,1
Model output 0 shape: (None, None, None, 255)

Weight load attempt success for model_0
Loaded model is successfully re-saved
Model model_0 compilation complete



In [9]:
# model_1 approximates Q7.12 signed fixed point operations with floating point rules (overflow = maximum/minimum value) 
# Done by rounding to the nearest 1/4096 and capping at [-128, 128) after batch normalization and activation layers

In [10]:
model_1 = None
'''
model_1_input = Input(shape=(None, None, 3), name="model_1_inputLayer")
# model_1_pointer = model_1_input
print("Input shape:", model_1_input.shape) # 448 x 448 x 3
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_input, layerFilter=16, name="model_1_layer0_branch") 
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 448 x 448 x 16
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 224 x 224 x 16
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=32, name="model_1_layer1_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 224 x 224 x 32
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 112 x 112 x 32
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=64, name="model_1_layer2_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 112 x 112 x 64
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 56 x 56 x 64
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=128, name="model_1_layer3_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 56 x 56 x 128
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 28 x 28 x 128
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=256, name="model_1_layer4_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_1_branch0
model_1_branch0 = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 256
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=512, name="model_1_layer5_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_1_branch0)
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=1024, name="model_1_layer6_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 1024
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=256, kernelSize=(1, 1), name="model_1_layer7_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_1_branch0 (14 x 14 x 256), following model_1_branch00
model_1_branch00 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=128, kernelSize=(1, 1), name="model_1_layer8_branch00")
# model_1_pointer = model_1_branch00
# print(model_1_pointer.shape) # 14 x 14 x 128
model_1_branch00 = UpSampling2D()(model_1_branch00)
# model_1_pointer = model_1_branch00
# print(model_1_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_1_branch1 (unchanged from model_1_startBranch) and model_1_branch00
model_1_mergedBranch = Concatenate()([model_1_startBranch, model_1_branch00])
# model_1_pointer = model_1_mergedBranch
# print(model_1_pointer.shape) # 28 x 28 x 384
model_1_mergedBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_mergedBranch, layerFilter=256, name="model_1_layer9_branch1")
# model_1_pointer = model_1_mergedBranch
# print(model_1_pointer.shape) # 28 x 28 x 256
model_1_mergedBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_1_layerA_branch1")
# model_1_pointer = model_1_mergedBranch
print("Model output 1 shape:", model_1_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_1_mergedBranch = IdentityFinalLayer(name="model_1_outputLayer_1")(model_1_mergedBranch)
print() # OUTPUT = model_1_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_1_branch01
model_1_branch01 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=512, name="model_1_layer8_branch01")
# model_1_pointer = model_1_branch01
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch01 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_1_layer9_branch01")
# model_1_pointer = model_1_branch01
print("Model output 0 shape:", model_1_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_1_branch01 = IdentityFinalLayer(name="model_1_outputLayer_0")(model_1_branch01)
print() # OUTPUT = model_1_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_1_actual = Model(inputs=model_1_input, outputs=[model_1_branch01, model_1_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_1_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_1_actual.to_json())

try:
#     model_1_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_1_actual.load_weights("./saved_models/model_1_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_1")
except Exception as e:
    print("Failed to load existing model for model_1:", e)
try:
    model_1_actual.save_weights("./saved_models/model_1_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_1 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_1 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_1_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_1', arguments=setArgs_model_1)([*model_1_actual.output, *y_true_model_1])

model_1 = Model([model_1_actual.input, *y_true_model_1], model_1_loss)

# model_1_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_1 adalah pendekatan sehingga output model_1 sedekat mungkin dengan 0 (model_1 ≈ model_1_actual - y_true)
model_1_learnRate = 1e-2
model_1.compile(optimizer=Adam(lr=model_1_learnRate), loss={'yolo_loss_model_1': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_1 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [11]:
# model_2 approximates Q7.12 signed fixed point operations with integer rules (overflow = positive -> negative & vice versa) 
# Done by rounding to the nearest 1/4096 and capping at [-128, 128) after batch normalization and activation layers

In [12]:
model_2 = None
'''
model_2_input = Input(shape=(None, None, 3), name="model_2_inputLayer")
# model_2_pointer = model_2_input
print("Input shape:", model_2_input.shape) # 448 x 448 x 3
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_input, layerFilter=16, name="model_2_layer0_branch") 
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 448 x 448 x 16
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 224 x 224 x 16
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=32, name="model_2_layer1_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 224 x 224 x 32
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 112 x 112 x 32
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=64, name="model_2_layer2_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 112 x 112 x 64
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 56 x 56 x 64
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=128, name="model_2_layer3_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 56 x 56 x 128
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 28 x 28 x 128
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=256, name="model_2_layer4_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_2_branch0
model_2_branch0 = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 256
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=512, name="model_2_layer5_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_2_branch0)
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=1024, name="model_2_layer6_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 1024
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=256, kernelSize=(1, 1), name="model_2_layer7_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_2_branch0 (14 x 14 x 256), following model_2_branch00
model_2_branch00 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=128, kernelSize=(1, 1), name="model_2_layer8_branch00")
# model_2_pointer = model_2_branch00
# print(model_2_pointer.shape) # 14 x 14 x 128
model_2_branch00 = UpSampling2D()(model_2_branch00)
# model_2_pointer = model_2_branch00
# print(model_2_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_2_branch1 (unchanged from model_2_startBranch) and model_2_branch00
model_2_mergedBranch = Concatenate()([model_2_startBranch, model_2_branch00])
# model_2_pointer = model_2_mergedBranch
# print(model_2_pointer.shape) # 28 x 28 x 384
model_2_mergedBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_mergedBranch, layerFilter=256, name="model_2_layer9_branch1")
# model_2_pointer = model_2_mergedBranch
# print(model_2_pointer.shape) # 28 x 28 x 256
model_2_mergedBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_2_layerA_branch1")
# model_2_pointer = model_2_mergedBranch
print("Model output 1 shape:", model_2_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_2_mergedBranch = IdentityFinalLayer(name="model_2_outputLayer_1")(model_2_mergedBranch)
print() # OUTPUT = model_2_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_2_branch01
model_2_branch01 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=512, name="model_2_layer8_branch01")
# model_2_pointer = model_2_branch01
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch01 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_2_layer9_branch01")
# model_2_pointer = model_2_branch01
print("Model output 0 shape:", model_2_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_2_branch01 = IdentityFinalLayer(name="model_2_outputLayer_0")(model_2_branch01)
print() # OUTPUT = model_2_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_2_actual = Model(inputs=model_2_input, outputs=[model_2_branch01, model_2_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_2_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_2_actual.to_json())

try:
#     model_2_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_2_actual.load_weights("./saved_models/model_2_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_2")
except Exception as e:
    print("Failed to load existing model for model_2:", e)
try:
    model_2_actual.save_weights("./saved_models/model_2_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_2 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_2 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_2_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_2', arguments=setArgs_model_2)([*model_2_actual.output, *y_true_model_2])

model_2 = Model([model_2_actual.input, *y_true_model_2], model_2_loss)

# model_2_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_2 adalah pendekatan sehingga output model_2 sedekat mungkin dengan 0 (model_2 ≈ model_2_actual - y_true)
model_2_learnRate = 1e-2
model_2.compile(optimizer=Adam(lr=model_2_learnRate), loss={'yolo_loss_model_2': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_2 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [13]:
# model_3 approximates Q3.4 signed fixed point operations with floating point rules (overflow = maximum/minimum value) 
# Done by rounding to the nearest 1/16 and capping at [-8, 8) after batch normalization and activation layers

In [14]:
model_3 = None
'''
model_3_input = Input(shape=(None, None, 3), name="model_3_inputLayer")
# model_3_pointer = model_3_input
print("Input shape:", model_3_input.shape) # 448 x 448 x 3
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_input, layerFilter=16, name="model_3_layer0_branch") 
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 448 x 448 x 16
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 224 x 224 x 16
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=32, name="model_3_layer1_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 224 x 224 x 32
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 112 x 112 x 32
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=64, name="model_3_layer2_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 112 x 112 x 64
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 56 x 56 x 64
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=128, name="model_3_layer3_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 56 x 56 x 128
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 28 x 28 x 128
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=256, name="model_3_layer4_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_3_branch0
model_3_branch0 = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 256
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=512, name="model_3_layer5_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_3_branch0)
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=1024, name="model_3_layer6_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 1024
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=256, kernelSize=(1, 1), name="model_3_layer7_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_3_branch0 (14 x 14 x 256), following model_3_branch00
model_3_branch00 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=128, kernelSize=(1, 1), name="model_3_layer8_branch00")
# model_3_pointer = model_3_branch00
# print(model_3_pointer.shape) # 14 x 14 x 128
model_3_branch00 = UpSampling2D()(model_3_branch00)
# model_3_pointer = model_3_branch00
# print(model_3_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_3_branch1 (unchanged from model_3_startBranch) and model_3_branch00
model_3_mergedBranch = Concatenate()([model_3_startBranch, model_3_branch00])
# model_3_pointer = model_3_mergedBranch
# print(model_3_pointer.shape) # 28 x 28 x 384
model_3_mergedBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_mergedBranch, layerFilter=256, name="model_3_layer9_branch1")
# model_3_pointer = model_3_mergedBranch
# print(model_3_pointer.shape) # 28 x 28 x 256
model_3_mergedBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_3_layerA_branch1")
# model_3_pointer = model_3_mergedBranch
print("Model output 1 shape:", model_3_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_3_mergedBranch = IdentityFinalLayer(name="model_3_outputLayer_1")(model_3_mergedBranch)
print() # OUTPUT = model_3_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_3_branch01
model_3_branch01 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=512, name="model_3_layer8_branch01")
# model_3_pointer = model_3_branch01
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch01 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_3_layer9_branch01")
# model_3_pointer = model_3_branch01
print("Model output 0 shape:", model_3_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_3_branch01 = IdentityFinalLayer(name="model_3_outputLayer_0")(model_3_branch01)
print() # OUTPUT = model_3_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_3_actual = Model(inputs=model_3_input, outputs=[model_3_branch01, model_3_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_3_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_3_actual.to_json())

try:
#     model_3_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_3_actual.load_weights("./saved_models/model_3_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_3")
except Exception as e:
    print("Failed to load existing model for model_3:", e)
try:
    model_3_actual.save_weights("./saved_models/model_3_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_3 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_3 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_3_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_3', arguments=setArgs_model_3)([*model_3_actual.output, *y_true_model_3])

model_3 = Model([model_3_actual.input, *y_true_model_3], model_3_loss)

# model_3_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_3 adalah pendekatan sehingga output model_3 sedekat mungkin dengan 0 (model_3 ≈ model_3_actual - y_true)
model_3_learnRate = 1e-2
model_3.compile(optimizer=Adam(lr=model_3_learnRate), loss={'yolo_loss_model_3': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_3 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [15]:
# model_4 approximates Q3.4 signed fixed point operations with integer rules (overflow = positive -> negative & vice versa) 
# Done by rounding to the nearest 1/16 and capping at [-8, 8) after batch normalization and activation layers

In [16]:
model_4 = None
'''
model_4_input = Input(shape=(None, None, 3), name="model_4_inputLayer")
# model_4_pointer = model_4_input
print("Input shape:", model_4_input.shape) # 448 x 448 x 3
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_input, layerFilter=16, name="model_4_layer0_branch") 
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 448 x 448 x 16
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 224 x 224 x 16
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=32, name="model_4_layer1_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 224 x 224 x 32
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 112 x 112 x 32
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=64, name="model_4_layer2_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 112 x 112 x 64
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 56 x 56 x 64
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=128, name="model_4_layer3_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 56 x 56 x 128
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 28 x 28 x 128
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=256, name="model_4_layer4_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_4_branch0
model_4_branch0 = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 256
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=512, name="model_4_layer5_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_4_branch0)
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=1024, name="model_4_layer6_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 1024
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=256, kernelSize=(1, 1), name="model_4_layer7_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_4_branch0 (14 x 14 x 256), following model_4_branch00
model_4_branch00 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=128, kernelSize=(1, 1), name="model_4_layer8_branch00")
# model_4_pointer = model_4_branch00
# print(model_4_pointer.shape) # 14 x 14 x 128
model_4_branch00 = UpSampling2D()(model_4_branch00)
# model_4_pointer = model_4_branch00
# print(model_4_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_4_branch1 (unchanged from model_4_startBranch) and model_4_branch00
model_4_mergedBranch = Concatenate()([model_4_startBranch, model_4_branch00])
# model_4_pointer = model_4_mergedBranch
# print(model_4_pointer.shape) # 28 x 28 x 384
model_4_mergedBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_mergedBranch, layerFilter=256, name="model_4_layer9_branch1")
# model_4_pointer = model_4_mergedBranch
# print(model_4_pointer.shape) # 28 x 28 x 256
model_4_mergedBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_4_layerA_branch1")
# model_4_pointer = model_4_mergedBranch
print("Model output 1 shape:", model_4_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_4_mergedBranch = IdentityFinalLayer(name="model_4_outputLayer_1")(model_4_mergedBranch)
print() # OUTPUT = model_4_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_4_branch01
model_4_branch01 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=512, name="model_4_layer8_branch01")
# model_4_pointer = model_4_branch01
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch01 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_4_layer9_branch01")
# model_4_pointer = model_4_branch01
print("Model output 0 shape:", model_4_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_4_branch01 = IdentityFinalLayer(name="model_4_outputLayer_0")(model_4_branch01)
print() # OUTPUT = model_4_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_4_actual = Model(inputs=model_4_input, outputs=[model_4_branch01, model_4_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_4_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_4_actual.to_json())

try:
#     model_4_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny-model_4.h5", by_name=True, skip_mismatch=True)
    model_4_actual.load_weights("./saved_models/model_4_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_4")
except Exception as e:
    print("Failed to load existing model for model_4:", e)
try:
    model_4_actual.save_weights("./saved_models/model_4_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_4 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_4 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_4_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_4', arguments=setArgs_model_4)([*model_4_actual.output, *y_true_model_4])

model_4 = Model([model_4_actual.input, *y_true_model_4], model_4_loss)

# model_4_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_4 adalah pendekatan sehingga output model_4 sedekat mungkin dengan 0 (model_4 ≈ model_4_actual - y_true)
model_4_learnRate = 1e-3
model_4.compile(optimizer=Adam(lr=model_4_learnRate), loss={'yolo_loss_model_4': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_4 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [17]:
if model_0 is not None:
    with open("./saved_models/model_0_summary.txt", "wt") as textFile:
        model_0.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [18]:
if model_1 is not None:
    with open("./saved_models/model_1_summary.txt", "wt") as textFile:
        model_1.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [19]:
if model_2 is not None:
    with open("./saved_models/model_2_summary.txt", "wt") as textFile:
        model_2.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [20]:
if model_3 is not None:
    with open("./saved_models/model_3_summary.txt", "wt") as textFile:
        model_3.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [21]:
if model_4 is not None:
    with open("./saved_models/model_4_summary.txt", "wt") as textFile:
        model_4.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [22]:
print()
trainingBatchSize = 1
epochSplit = 1047 
# because high split = more work saved; 117266 mod 499 = 1 <= less image lost for (416,416)
# 117266 mod 1047 = 2 <= due to high loss of (448,448)
train_data_generator = data_generator_wrapper(
    annotation_lines=train_annotation_lines, 
    batch_size=trainingBatchSize, 
    input_shape=image_size, 
    anchors=tinyYolo_anchors, 
    num_classes=classificationClass
)
val_data_generator = data_generator_wrapper(
    annotation_lines=val_annotation_lines, 
    batch_size=trainingBatchSize, 
    input_shape=image_size, 
    anchors=tinyYolo_anchors, 
    num_classes=classificationClass
)
minimumLR = 1e-4
decayChance = 0.50
class BestValueRecorder(Callback):
    def __init__(self, filepath, monitorValidation=False, mode='min', verbose=1, instanceModelCheckpointLoss=None, instanceModelCheckpointVal=None):
        super(BestValueRecorder, self).__init__()
        self.lookOnVal = monitorValidation
        self.verbose = verbose
        if mode not in ['min', 'max']:
            mode = 'min'
        if mode == 'min':
            self.monitor_op = np.less
            self.MCLossValue = np.Inf
            self.MCValLossValue = np.Inf
        else:
            self.monitor_op = np.greater
            self.MCLossValue = -np.Inf
            self.MCValLossValue = -np.Inf
        self.filepath = filepath
        try:
            with open(self.filepath, 'rt') as jsonFile:
                bestValueList = json.loads(jsonFile.read())
                self.MCLossValue =  bestValueList[0]
                if self.lookOnVal:
                    self.MCValLossValue = bestValueList[1]
        except Exception as e:
            print("Failed to open JSON file:", e)
        if instanceModelCheckpointLoss is not None:
            instanceModelCheckpointLoss.best = self.MCLossValue
        if instanceModelCheckpointVal is not None and monitorValidation:
            instanceModelCheckpointVal.best = self.MCValLossValue

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        currentLoss = logs.get('loss')
        newChange = False
        if currentLoss is not None and self.monitor_op(currentLoss, self.MCLossValue):
            self.MCLossValue = currentLoss
            newChange = True
        if self.lookOnVal:
            currentValLoss = logs.get('val_loss')
            if currentValLoss is not None and self.monitor_op(currentValLoss, self.MCValLossValue):
                self.MCValLossValue = currentValLoss
                newChange = True
        if newChange:
            try:
                with open(self.filepath, 'wt') as jsonFile:
                    if self.verbose > 0:
                        print("Saving best loss value:", [self.MCLossValue, self.MCValLossValue])
                    jsonFile.write(json.dumps([self.MCLossValue, self.MCValLossValue]))
            except Exception as e:
                print("Failed to open JSON file:", e)
print()





In [23]:
print()
# ''' // comment on this line to enable/disable this block
model_0_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_0_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_0_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_0_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
model_0_recorder = BestValueRecorder(
    filepath='./saved_models/model_0_bestValue.json',
    monitorValidation=True,
    verbose=1,
    mode='min',
    # these instances are modified by the __init__ (constructor) of this object
    instanceModelCheckpointLoss=model_0_checkpoint_loss,
    instanceModelCheckpointVal=model_0_checkpoint_val
)
model_0_ER = EarlyStopping(
    monitor='loss',
    patience=15,
    verbose=1,
    mode='min',
    baseline=model_0_checkpoint_loss.best # this instance is not modified by the __init__ of this object
)
model_0_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_0_learnRate, '- minimum LR:', minimumLR)
    print()
    model_0_history = model_0.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_0_checkpoint_val,
            model_0_checkpoint_loss,
            model_0_recorder,
            model_0_ER
        ]
    )
    if np.random.rand() >= decayChance:
        model_0_ER.patience = model_0_ER.patience * 1.15
        print("Early stopping patience rate increased -", model_0_ER.patience)
    if model_0_learnRate * model_0_LRDecay >= minimumLR and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_0_learnRate = model_0_LRDecay * model_0_learnRate
    model_0.load_weights("./saved_models/model_0_checkpoint.h5")
    model_0.compile(optimizer=Adam(lr=model_0_learnRate), loss={'yolo_loss_model_0': lambda y_true, y_pred: y_pred})
    print()
print("model_0 training done in", str(time.time() - start_time))
model_0.save_weights("./saved_models/model_0_trainModel.h5")
# '''
print()


Failed to open JSON file: [Errno 2] No such file or directory: './saved_models/model_0_bestValue.json'
Time 0.000997781753540039
Super-epoch 1 - learn rate: 0.01 - minimum LR: 0.0001

Epoch 1/1047
 - 32s - loss: 2301.0102 - val_loss: 894.0676

Epoch 00001: val_loss improved from inf to 894.06757, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [2301.01023210798, 894.0675659179688]
Epoch 2/1047
 - 25s - loss: 2142.7837 - val_loss: 2365.2607

Epoch 00002: val_loss did not improve from 894.06757

Epoch 00002: loss improved from inf to 2142.78371, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [2142.783708844866, 894.0675659179688]
Epoch 3/1047
 - 27s - loss: 2026.5820 - val_loss: 1862.9827

Epoch 00003: val_loss did not improve from 894.06757
Saving best loss value: [2026.5819778442383, 894.0675659179688]
Epoch 4/1047
 - 27s - loss: 1978.0873 - val_loss: 2855.9595

Epoch 00004: val_loss did not improve from 894.06757

Epoch 00004

 - 21s - loss: 452.6729 - val_loss: 356.2049

Epoch 00033: val_loss improved from 381.85080 to 356.20490, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [452.6729278564453, 356.20489501953125]
Epoch 34/1047
 - 22s - loss: 457.0593 - val_loss: 399.1700

Epoch 00034: val_loss did not improve from 356.20490

Epoch 00034: loss improved from 459.96665 to 457.05927, saving model to ./saved_models/model_0_checkpoint.h5
Epoch 35/1047
 - 23s - loss: 424.6383 - val_loss: 391.9345

Epoch 00035: val_loss did not improve from 356.20490
Saving best loss value: [424.63830075945174, 356.20489501953125]
Epoch 36/1047
 - 26s - loss: 404.2475 - val_loss: 568.9353

Epoch 00036: val_loss did not improve from 356.20490

Epoch 00036: loss improved from 457.05927 to 404.24745, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [404.2474504198347, 356.20489501953125]
Epoch 37/1047
 - 27s - loss: 387.1535 - val_loss: 358.3456

Epoch 00037: val_loss did not

Saving best loss value: [151.4783137866429, 88.14046478271484]
Epoch 69/1047
 - 26s - loss: 130.0446 - val_loss: 73.9109

Epoch 00069: val_loss improved from 88.14046 to 73.91090, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [130.04458039147514, 73.91089630126953]
Epoch 70/1047
 - 21s - loss: 132.2868 - val_loss: 191.0195

Epoch 00070: val_loss did not improve from 73.91090

Epoch 00070: loss improved from 157.04092 to 132.28683, saving model to ./saved_models/model_0_checkpoint.h5
Epoch 71/1047
 - 20s - loss: 139.5778 - val_loss: 112.3438

Epoch 00071: val_loss did not improve from 73.91090
Epoch 72/1047
 - 25s - loss: 145.7520 - val_loss: 203.9341

Epoch 00072: val_loss did not improve from 73.91090

Epoch 00072: loss did not improve from 132.28683
Epoch 73/1047
 - 20s - loss: 143.7640 - val_loss: 154.8047

Epoch 00073: val_loss did not improve from 73.91090
Epoch 74/1047
 - 20s - loss: 121.5890 - val_loss: 85.3556

Epoch 00074: val_loss did not improv

Saving best loss value: [80.8630703006472, 29.255266189575195]
Epoch 115/1047
 - 21s - loss: 78.7517 - val_loss: 116.2438

Epoch 00115: val_loss did not improve from 29.25527
Saving best loss value: [78.75166829994747, 29.255266189575195]
Epoch 116/1047
 - 27s - loss: 77.0743 - val_loss: 299.0839

Epoch 00116: val_loss did not improve from 29.25527

Epoch 00116: loss improved from 80.86307 to 77.07427, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [77.07427070822034, 29.255266189575195]
Epoch 117/1047
 - 23s - loss: 87.4197 - val_loss: 70.6755

Epoch 00117: val_loss did not improve from 29.25527
Epoch 118/1047
 - 24s - loss: 89.5426 - val_loss: 28.8730

Epoch 00118: val_loss improved from 29.25527 to 28.87304, saving model to ./saved_models/model_0_checkpoint.h5

Epoch 00118: loss did not improve from 77.07427
Saving best loss value: [77.07427070822034, 28.873044967651367]
Epoch 119/1047
 - 19s - loss: 70.6103 - val_loss: 157.5086

Epoch 00119: val_loss d


Epoch 00004: val_loss did not improve from 18.26878

Epoch 00004: loss did not improve from 68.86624
Epoch 5/1047
 - 20s - loss: 83.1040 - val_loss: 37.0690

Epoch 00005: val_loss did not improve from 18.26878
Epoch 6/1047
 - 18s - loss: 74.8739 - val_loss: 28.6870

Epoch 00006: val_loss did not improve from 18.26878

Epoch 00006: loss did not improve from 68.86624
Epoch 7/1047
 - 20s - loss: 77.3035 - val_loss: 49.7293

Epoch 00007: val_loss did not improve from 18.26878
Epoch 8/1047
 - 19s - loss: 75.6107 - val_loss: 44.0311

Epoch 00008: val_loss did not improve from 18.26878

Epoch 00008: loss did not improve from 68.86624
Epoch 9/1047
 - 19s - loss: 68.1599 - val_loss: 195.0895

Epoch 00009: val_loss did not improve from 18.26878
Epoch 10/1047
 - 20s - loss: 81.0310 - val_loss: 193.9503

Epoch 00010: val_loss did not improve from 18.26878

Epoch 00010: loss did not improve from 68.86624
Epoch 11/1047
 - 18s - loss: 63.1547 - val_loss: 171.2231

Epoch 00011: val_loss did not impro


Epoch 00036: val_loss did not improve from 4.49246

Epoch 00036: loss did not improve from 65.76438
Epoch 37/1047
 - 19s - loss: 80.1199 - val_loss: 166.0494

Epoch 00037: val_loss did not improve from 4.49246
Epoch 38/1047
 - 19s - loss: 90.7971 - val_loss: 50.9720

Epoch 00038: val_loss did not improve from 4.49246

Epoch 00038: loss did not improve from 65.76438
Epoch 39/1047
 - 19s - loss: 77.6000 - val_loss: 142.1120

Epoch 00039: val_loss did not improve from 4.49246
Epoch 40/1047
 - 22s - loss: 80.0530 - val_loss: 45.5230

Epoch 00040: val_loss did not improve from 4.49246

Epoch 00040: loss did not improve from 65.76438
Epoch 41/1047
 - 19s - loss: 72.3976 - val_loss: 29.6883

Epoch 00041: val_loss did not improve from 4.49246
Epoch 42/1047
 - 19s - loss: 64.0885 - val_loss: 25.4965

Epoch 00042: val_loss did not improve from 4.49246

Epoch 00042: loss improved from 65.76438 to 64.08845, saving model to ./saved_models/model_0_checkpoint.h5
Epoch 43/1047
 - 18s - loss: 69.9210 

 - 21s - loss: 75.1962 - val_loss: 82.8467

Epoch 00008: val_loss did not improve from 4.49246
Epoch 9/1047
 - 21s - loss: 71.1390 - val_loss: 69.1684

Epoch 00009: val_loss did not improve from 4.49246

Epoch 00009: loss did not improve from 62.52690
Epoch 10/1047
 - 18s - loss: 77.4792 - val_loss: 30.3802

Epoch 00010: val_loss did not improve from 4.49246
Epoch 11/1047
 - 18s - loss: 74.4698 - val_loss: 63.1706

Epoch 00011: val_loss did not improve from 4.49246

Epoch 00011: loss did not improve from 62.52690
Epoch 12/1047
 - 19s - loss: 80.4048 - val_loss: 73.5592

Epoch 00012: val_loss did not improve from 4.49246
Epoch 13/1047
 - 19s - loss: 58.2202 - val_loss: 51.4339

Epoch 00013: val_loss did not improve from 4.49246

Epoch 00013: loss improved from 62.52690 to 58.22021, saving model to ./saved_models/model_0_checkpoint.h5
Saving best loss value: [58.22020722287042, 4.492456436157227]
Epoch 14/1047
 - 20s - loss: 66.5041 - val_loss: 41.7905

Epoch 00014: val_loss did not impr

 - 22s - loss: 70.3145 - val_loss: 18.7508

Epoch 00019: val_loss did not improve from 3.03583
Epoch 20/1047
 - 24s - loss: 76.0134 - val_loss: 17.1905

Epoch 00020: val_loss did not improve from 3.03583

Epoch 00020: loss did not improve from 58.22021
Epoch 21/1047
 - 20s - loss: 60.7410 - val_loss: 175.9101

Epoch 00021: val_loss did not improve from 3.03583
Epoch 22/1047
 - 25s - loss: 69.2367 - val_loss: 17.2922

Epoch 00022: val_loss did not improve from 3.03583

Epoch 00022: loss did not improve from 58.22021
Epoch 23/1047
 - 21s - loss: 79.4660 - val_loss: 58.0289

Epoch 00023: val_loss did not improve from 3.03583
Epoch 24/1047
 - 20s - loss: 67.4727 - val_loss: 46.5110

Epoch 00024: val_loss did not improve from 3.03583

Epoch 00024: loss did not improve from 58.22021
Epoch 25/1047
 - 20s - loss: 83.8695 - val_loss: 79.6844

Epoch 00025: val_loss did not improve from 3.03583
Epoch 26/1047
 - 20s - loss: 78.3824 - val_loss: 5.0949

Epoch 00026: val_loss did not improve from 3.0


Epoch 00001: val_loss did not improve from 3.03583

Epoch 00001: loss did not improve from 58.22021
Epoch 2/1047
 - 22s - loss: 79.1188 - val_loss: 227.4022

Epoch 00002: val_loss did not improve from 3.03583
Epoch 3/1047
 - 19s - loss: 68.9567 - val_loss: 17.2777

Epoch 00003: val_loss did not improve from 3.03583

Epoch 00003: loss did not improve from 58.22021
Epoch 4/1047
 - 21s - loss: 86.7058 - val_loss: 25.8350

Epoch 00004: val_loss did not improve from 3.03583
Epoch 5/1047
 - 20s - loss: 72.0714 - val_loss: 17.8748

Epoch 00005: val_loss did not improve from 3.03583

Epoch 00005: loss did not improve from 58.22021
Epoch 6/1047
 - 26s - loss: 79.0720 - val_loss: 140.0429

Epoch 00006: val_loss did not improve from 3.03583
Epoch 7/1047
 - 25s - loss: 81.2651 - val_loss: 75.0169

Epoch 00007: val_loss did not improve from 3.03583

Epoch 00007: loss did not improve from 58.22021
Epoch 8/1047
 - 23s - loss: 68.3823 - val_loss: 117.9573

Epoch 00008: val_loss did not improve from 3

 - 21s - loss: 77.9968 - val_loss: 71.5765

Epoch 00016: val_loss did not improve from 3.03583
Epoch 17/1047
 - 19s - loss: 67.6013 - val_loss: 74.8716

Epoch 00017: val_loss did not improve from 3.03583

Epoch 00017: loss did not improve from 58.22021
Epoch 18/1047
 - 20s - loss: 72.2307 - val_loss: 86.3490

Epoch 00018: val_loss did not improve from 3.03583
Epoch 19/1047
 - 24s - loss: 68.8679 - val_loss: 81.9857

Epoch 00019: val_loss did not improve from 3.03583

Epoch 00019: loss did not improve from 58.22021
Epoch 20/1047
 - 21s - loss: 69.7087 - val_loss: 57.4352

Epoch 00020: val_loss did not improve from 3.03583
Epoch 21/1047
 - 20s - loss: 66.1406 - val_loss: 46.2175

Epoch 00021: val_loss did not improve from 3.03583

Epoch 00021: loss did not improve from 58.22021
Epoch 22/1047
 - 19s - loss: 75.3014 - val_loss: 84.3633

Epoch 00022: val_loss did not improve from 3.03583
Epoch 23/1047
 - 21s - loss: 65.8948 - val_loss: 46.9509

Epoch 00023: val_loss did not improve from 3.0

 - 18s - loss: 74.6371 - val_loss: 19.9461

Epoch 00030: val_loss did not improve from 2.66865
Epoch 31/1047
 - 19s - loss: 68.7694 - val_loss: 28.4142

Epoch 00031: val_loss did not improve from 2.66865

Epoch 00031: loss did not improve from 58.22021
Epoch 32/1047
 - 18s - loss: 66.4203 - val_loss: 39.6662

Epoch 00032: val_loss did not improve from 2.66865
Epoch 33/1047
 - 18s - loss: 68.2857 - val_loss: 300.4698

Epoch 00033: val_loss did not improve from 2.66865

Epoch 00033: loss did not improve from 58.22021
Epoch 34/1047
 - 18s - loss: 78.9164 - val_loss: 70.8322

Epoch 00034: val_loss did not improve from 2.66865
Epoch 35/1047
 - 18s - loss: 84.6206 - val_loss: 130.7243

Epoch 00035: val_loss did not improve from 2.66865

Epoch 00035: loss did not improve from 58.22021
Epoch 36/1047
 - 19s - loss: 73.9495 - val_loss: 123.7767

Epoch 00036: val_loss did not improve from 2.66865
Epoch 37/1047
 - 18s - loss: 76.4776 - val_loss: 69.5661

Epoch 00037: val_loss did not improve from 

 - 19s - loss: 79.9719 - val_loss: 68.3087

Epoch 00018: val_loss did not improve from 2.66865
Epoch 19/1047
 - 18s - loss: 81.2813 - val_loss: 42.6080

Epoch 00019: val_loss did not improve from 2.66865

Epoch 00019: loss did not improve from 58.22021
Epoch 20/1047
 - 18s - loss: 74.8985 - val_loss: 91.7108

Epoch 00020: val_loss did not improve from 2.66865
Epoch 21/1047
 - 19s - loss: 59.8429 - val_loss: 35.4733

Epoch 00021: val_loss did not improve from 2.66865

Epoch 00021: loss did not improve from 58.22021
Epoch 22/1047
 - 19s - loss: 64.3541 - val_loss: 259.6308

Epoch 00022: val_loss did not improve from 2.66865
Epoch 23/1047
 - 19s - loss: 77.4255 - val_loss: 166.7739

Epoch 00023: val_loss did not improve from 2.66865

Epoch 00023: loss did not improve from 58.22021
Epoch 24/1047
 - 19s - loss: 70.0239 - val_loss: 38.4058

Epoch 00024: val_loss did not improve from 2.66865
Epoch 25/1047
 - 18s - loss: 70.0749 - val_loss: 166.9904

Epoch 00025: val_loss did not improve from 


Epoch 00024: val_loss did not improve from 2.66865

Epoch 00024: loss did not improve from 58.22021
Epoch 25/1047
 - 20s - loss: 73.1820 - val_loss: 28.7759

Epoch 00025: val_loss did not improve from 2.66865
Epoch 26/1047
 - 21s - loss: 77.3028 - val_loss: 43.6110

Epoch 00026: val_loss did not improve from 2.66865

Epoch 00026: loss did not improve from 58.22021
Epoch 27/1047
 - 19s - loss: 83.0928 - val_loss: 91.2415

Epoch 00027: val_loss did not improve from 2.66865
Epoch 28/1047
 - 19s - loss: 70.4363 - val_loss: 54.6586

Epoch 00028: val_loss did not improve from 2.66865

Epoch 00028: loss did not improve from 58.22021
Epoch 29/1047
 - 18s - loss: 67.1768 - val_loss: 32.2850

Epoch 00029: val_loss did not improve from 2.66865
Epoch 30/1047
 - 18s - loss: 82.5152 - val_loss: 104.4368

Epoch 00030: val_loss did not improve from 2.66865

Epoch 00030: loss did not improve from 58.22021
Epoch 31/1047
 - 19s - loss: 78.9971 - val_loss: 99.6036

Epoch 00031: val_loss did not improve f

 - 19s - loss: 71.8497 - val_loss: 328.8048

Epoch 00007: val_loss did not improve from 2.66865
Epoch 8/1047
 - 21s - loss: 72.0606 - val_loss: 71.0257

Epoch 00008: val_loss did not improve from 2.66865

Epoch 00008: loss did not improve from 58.22021
Epoch 9/1047
 - 22s - loss: 76.8750 - val_loss: 46.5264

Epoch 00009: val_loss did not improve from 2.66865
Epoch 10/1047
 - 20s - loss: 81.6983 - val_loss: 60.2888

Epoch 00010: val_loss did not improve from 2.66865

Epoch 00010: loss did not improve from 58.22021
Epoch 11/1047
 - 21s - loss: 76.2432 - val_loss: 50.0986

Epoch 00011: val_loss did not improve from 2.66865
Epoch 12/1047
 - 22s - loss: 68.4226 - val_loss: 138.6323

Epoch 00012: val_loss did not improve from 2.66865

Epoch 00012: loss did not improve from 58.22021
Epoch 13/1047
 - 19s - loss: 69.7791 - val_loss: 58.3181

Epoch 00013: val_loss did not improve from 2.66865
Epoch 14/1047
 - 21s - loss: 74.8298 - val_loss: 33.7795

Epoch 00014: val_loss did not improve from 2.6

 - 23s - loss: 69.3570 - val_loss: 148.0043

Epoch 00069: val_loss did not improve from 2.66865
Epoch 70/1047
 - 19s - loss: 70.9673 - val_loss: 45.5712

Epoch 00070: val_loss did not improve from 2.66865

Epoch 00070: loss did not improve from 58.22021
Epoch 71/1047
 - 19s - loss: 74.3701 - val_loss: 57.9748

Epoch 00071: val_loss did not improve from 2.66865
Epoch 72/1047
 - 24s - loss: 75.8981 - val_loss: 109.3747

Epoch 00072: val_loss did not improve from 2.66865

Epoch 00072: loss did not improve from 58.22021
Epoch 73/1047
 - 18s - loss: 78.2042 - val_loss: 30.4180

Epoch 00073: val_loss did not improve from 2.66865
Epoch 74/1047
 - 20s - loss: 58.6670 - val_loss: 32.2496

Epoch 00074: val_loss did not improve from 2.66865

Epoch 00074: loss did not improve from 58.22021
Epoch 75/1047
 - 21s - loss: 74.1405 - val_loss: 43.6153

Epoch 00075: val_loss did not improve from 2.66865
Epoch 76/1047
 - 22s - loss: 84.9764 - val_loss: 90.0943

Epoch 00076: val_loss did not improve from 2


Epoch 00019: val_loss did not improve from 2.66865

Epoch 00019: loss did not improve from 56.69710
Epoch 20/1047
 - 19s - loss: 81.7071 - val_loss: 118.9585

Epoch 00020: val_loss did not improve from 2.66865
Epoch 21/1047
 - 19s - loss: 73.6488 - val_loss: 44.1335

Epoch 00021: val_loss did not improve from 2.66865

Epoch 00021: loss did not improve from 56.69710
Epoch 22/1047
 - 19s - loss: 80.5823 - val_loss: 51.9728

Epoch 00022: val_loss did not improve from 2.66865
Epoch 23/1047
 - 20s - loss: 75.4716 - val_loss: 53.3570

Epoch 00023: val_loss did not improve from 2.66865

Epoch 00023: loss did not improve from 56.69710
Epoch 24/1047
 - 19s - loss: 67.7569 - val_loss: 151.0511

Epoch 00024: val_loss did not improve from 2.66865
Epoch 25/1047
 - 21s - loss: 79.3756 - val_loss: 21.5205

Epoch 00025: val_loss did not improve from 2.66865

Epoch 00025: loss did not improve from 56.69710
Epoch 26/1047
 - 26s - loss: 78.0634 - val_loss: 33.4691

Epoch 00026: val_loss did not improve 


Epoch 00038: val_loss did not improve from 2.66865

Epoch 00038: loss did not improve from 56.69710
Epoch 39/1047
 - 19s - loss: 71.1250 - val_loss: 3.1643

Epoch 00039: val_loss did not improve from 2.66865
Epoch 40/1047
 - 20s - loss: 69.8202 - val_loss: 89.5265

Epoch 00040: val_loss did not improve from 2.66865

Epoch 00040: loss did not improve from 56.69710
Epoch 41/1047
 - 19s - loss: 78.7386 - val_loss: 36.0739

Epoch 00041: val_loss did not improve from 2.66865
Epoch 42/1047
 - 19s - loss: 73.9216 - val_loss: 66.1537

Epoch 00042: val_loss did not improve from 2.66865

Epoch 00042: loss did not improve from 56.69710
Epoch 43/1047
 - 20s - loss: 64.7406 - val_loss: 101.0954

Epoch 00043: val_loss did not improve from 2.66865
Epoch 44/1047
 - 20s - loss: 71.5634 - val_loss: 45.2373

Epoch 00044: val_loss did not improve from 2.66865

Epoch 00044: loss did not improve from 56.69710
Epoch 45/1047
 - 19s - loss: 65.8691 - val_loss: 112.5615

Epoch 00045: val_loss did not improve f


Epoch 00017: val_loss did not improve from 1.97494
Epoch 18/1047
 - 19s - loss: 78.0712 - val_loss: 43.4324

Epoch 00018: val_loss did not improve from 1.97494

Epoch 00018: loss did not improve from 56.69710
Epoch 19/1047
 - 20s - loss: 76.3382 - val_loss: 69.1209

Epoch 00019: val_loss did not improve from 1.97494
Epoch 20/1047
 - 18s - loss: 68.3519 - val_loss: 53.2678

Epoch 00020: val_loss did not improve from 1.97494

Epoch 00020: loss did not improve from 56.69710
Epoch 21/1047
 - 18s - loss: 74.9316 - val_loss: 117.6505

Epoch 00021: val_loss did not improve from 1.97494
Epoch 22/1047
 - 19s - loss: 81.6433 - val_loss: 124.6925

Epoch 00022: val_loss did not improve from 1.97494

Epoch 00022: loss did not improve from 56.69710
Epoch 23/1047
 - 19s - loss: 65.6355 - val_loss: 10.8914

Epoch 00023: val_loss did not improve from 1.97494
Epoch 24/1047
 - 19s - loss: 71.1010 - val_loss: 46.5202

Epoch 00024: val_loss did not improve from 1.97494

Epoch 00024: loss did not improve f

 - 19s - loss: 69.3761 - val_loss: 145.8154

Epoch 00079: val_loss did not improve from 1.97494
Epoch 80/1047
 - 20s - loss: 64.5283 - val_loss: 158.1265

Epoch 00080: val_loss did not improve from 1.97494

Epoch 00080: loss did not improve from 56.69710
Epoch 81/1047
 - 19s - loss: 69.5368 - val_loss: 59.4281

Epoch 00081: val_loss did not improve from 1.97494
Epoch 82/1047
 - 19s - loss: 72.0387 - val_loss: 78.1184

Epoch 00082: val_loss did not improve from 1.97494

Epoch 00082: loss did not improve from 56.69710
Epoch 83/1047
 - 19s - loss: 73.6260 - val_loss: 26.6813

Epoch 00083: val_loss did not improve from 1.97494
Epoch 84/1047
 - 19s - loss: 68.3374 - val_loss: 162.1488

Epoch 00084: val_loss did not improve from 1.97494

Epoch 00084: loss did not improve from 56.69710
Epoch 85/1047
 - 19s - loss: 65.4667 - val_loss: 48.0846

Epoch 00085: val_loss did not improve from 1.97494
Epoch 86/1047
 - 18s - loss: 70.1718 - val_loss: 21.9898

Epoch 00086: val_loss did not improve from 

In [24]:
print()
''' // comment on this line to enable/disable this block
model_1_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_1_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_1_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_1_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
model_1_recorder = BestValueRecorder(
    filepath='./saved_models/model_1_bestValue.json',
    monitorValidation=True,
    verbose=1,
    mode='min',
    # these instances are modified by the __init__ (constructor) of this object
    instanceModelCheckpointLoss=model_1_checkpoint_loss,
    instanceModelCheckpointVal=model_1_checkpoint_val
)
model_1_ER = EarlyStopping(
    monitor='loss',
    patience=15,
    verbose=1,
    mode='min',
    baseline=model_1_checkpoint_loss.best # this instance is not modified by the __init__ of this object
)
model_1_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_1_learnRate, '- minimum LR:', minimumLR)
    print()
    model_1_history = model_1.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_1_checkpoint_val,
            model_1_checkpoint_loss,
            model_1_recorder,
            model_1_ER
        ]
    )
    if np.random.rand() >= decayChance:
        model_1_ER.patience = model_1_ER.patience * 1.15
        print("Early stopping patience rate increased -", model_1_ER.patience)
    if model_1_learnRate * model_1_LRDecay >= minimumLR and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_1_learnRate = model_1_LRDecay * model_1_learnRate
    model_1.load_weights("./saved_models/model_1_checkpoint.h5")
    model_1.compile(optimizer=Adam(lr=model_1_learnRate), loss={'yolo_loss_model_1': lambda y_true, y_pred: y_pred})
    print()
print("model_1 training done in", str(time.time() - start_time))
model_1.save_weights("./saved_models/model_1_trainModel.h5")
# '''
print()





In [25]:
print()
''' // comment on this line to enable/disable this block
model_2_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_2_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_2_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_2_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
model_2_recorder = BestValueRecorder(
    filepath='./saved_models/model_2_bestValue.json',
    monitorValidation=True,
    verbose=1,
    mode='min',
    # these instances are modified by the __init__ (constructor) of this object
    instanceModelCheckpointLoss=model_2_checkpoint_loss,
    instanceModelCheckpointVal=model_2_checkpoint_val
)
model_2_ER = EarlyStopping(
    monitor='loss',
    patience=15,
    verbose=1,
    mode='min',
    baseline=model_2_checkpoint_loss.best # this instance is not modified by the __init__ of this object
)
model_2_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_2_learnRate, '- minimum LR:', minimumLR)
    print()
    model_2_history = model_2.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_2_checkpoint_val,
            model_2_checkpoint_loss,
            model_2_recorder,
            model_2_ER
        ]
    )
    if np.random.rand() >= decayChance:
        model_2_ER.patience = model_2_ER.patience * 1.15
        print("Early stopping patience rate increased -", model_2_ER.patience)
    if model_2_learnRate * model_2_LRDecay >= minimumLR and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_2_learnRate = model_2_LRDecay * model_2_learnRate
    model_2.load_weights("./saved_models/model_2_checkpoint.h5")
    model_2.compile(optimizer=Adam(lr=model_2_learnRate), loss={'yolo_loss_model_2': lambda y_true, y_pred: y_pred})
    print()
print("model_2 training done in", str(time.time() - start_time))
model_2.save_weights("./saved_models/model_2_trainModel.h5")
# '''
print()





In [26]:
print()
''' // comment on this line to enable/disable this block
model_3_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_3_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_3_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_3_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
model_3_recorder = BestValueRecorder(
    filepath='./saved_models/model_3_bestValue.json',
    monitorValidation=True,
    verbose=1,
    mode='min',
    # these instances are modified by the __init__ (constructor) of this object
    instanceModelCheckpointLoss=model_3_checkpoint_loss,
    instanceModelCheckpointVal=model_3_checkpoint_val
)
model_3_ER = EarlyStopping(
    monitor='loss',
    patience=15,
    verbose=1,
    mode='min',
    baseline=model_3_checkpoint_loss.best # this instance is not modified by the __init__ of this object
)
model_3_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_3_learnRate, '- minimum LR:', minimumLR)
    print()
    model_3_history = model_3.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_3_checkpoint_val,
            model_3_checkpoint_loss,
            model_3_recorder,
            model_3_ER
        ]
    )
    if np.random.rand() >= decayChance:
        model_3_ER.patience = model_3_ER.patience * 1.15
        print("Early stopping patience rate increased -", model_3_ER.patience)
    if model_3_learnRate * model_3_LRDecay >= minimumLR and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_3_learnRate = model_3_LRDecay * model_3_learnRate
    model_3.load_weights("./saved_models/model_3_checkpoint.h5")
    model_3.compile(optimizer=Adam(lr=model_3_learnRate), loss={'yolo_loss_model_3': lambda y_true, y_pred: y_pred})
    print()
print("model_3 training done in", str(time.time() - start_time))
model_3.save_weights("./saved_models/model_3_trainModel.h5")
# '''
print()





In [27]:
print()
''' // comment on this line to enable/disable this block
model_4_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_4_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_4_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_4_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
model_4_recorder = BestValueRecorder(
    filepath='./saved_models/model_4_bestValue.json',
    monitorValidation=True,
    verbose=1,
    mode='min',
    # these instances are modified by the __init__ (constructor) of this object
    instanceModelCheckpointLoss=model_4_checkpoint_loss,
    instanceModelCheckpointVal=model_4_checkpoint_val
)
model_4_ER = EarlyStopping(
    monitor='loss',
    patience=15,
    verbose=1,
    mode='min',
    baseline=model_4_checkpoint_loss.best # this instance is not modified by the __init__ of this object
)
model_4_LRDecay = math.pow(1 / 4, 1 / 8) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_4_learnRate, '- minimum LR:', minimumLR)
    print()
    model_4_history = model_4.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_4_checkpoint_val,
            model_4_checkpoint_loss,
            model_4_recorder,
            model_4_ER
        ]
    )
    if np.random.rand() >= decayChance:
        model_4_ER.patience = model_4_ER.patience * 1.15
        print("Early stopping patience rate increased -", model_4_ER.patience)
    if model_4_learnRate * model_4_LRDecay >= minimumLR and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_4_learnRate = model_4_LRDecay * model_4_learnRate
    model_4.load_weights("./saved_models/model_4_checkpoint.h5")
    model_4.compile(optimizer=Adam(lr=model_4_learnRate), loss={'yolo_loss_model_4': lambda y_true, y_pred: y_pred})
    print()
print("model_4 training done in", str(time.time() - start_time))
model_4.save_weights("./saved_models/model_4_trainModel.h5")
# '''
print()





In [28]:
print("Resource successfully released")

Resource successfully released
