In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image
import math
import pickle
import time
import tensorflow.keras as keras
from keras.layers import Input, Layer, LeakyReLU, BatchNormalization, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Add, Lambda
from keras.models import Model, model_from_json, load_model
from keras.optimizers import SGD, Adam
from keras.regularizers import l2
from keras.callbacks import TerminateOnNaN, ModelCheckpoint, Callback, EarlyStopping
import keras.backend as K
import os
K.clear_session()
K.set_floatx('float32')
print("Running Tensorflow version", tf.__version__)
print("Keras is running on", K.backend(), "backend")

Using TensorFlow backend.


Running Tensorflow version 2.1.0
Keras is running on tensorflow backend


In [2]:
# problem with model with rounding 
'''
def roundingAlgo(x): 
    # first one that works with model_1 & model_2 
    # problem - this rounding function is slow: model_2 = 3 hours / epoch
    # comparison, model_0 = 20 mins / epoch
    # in addition, off by half with integer inputs (lower than actual value, e.g. floor(2) ≈ 1.5, floor(2.01) ≈ 2)
    # source: https://en.wikipedia.org/wiki/Floor_and_ceiling_functions#Continuity_and_series_expansions
    if True:
        result = x - 0.5
        for p in range(1, 7):
            result = result + K.sin(x * p * 2 * math.pi) / (p * math.pi)
    return result
# '''
'''     
def roundingAlgo(x):
    # second one that works with model_2 
    # problem - this rounding function is slower than first working algo: model_2 = 4,2 hours / epoch
    # comparison, model_0 = 20 mins / epoch
    # source: self
    return x - x % 1
# '''
# '''
def roundingAlgo(x): 
    # simplification of the first algo loop by simplifying the expression for range(1,7)
    # problem - rounding function is still slow = 2,5 hours / epoch
    # all non-speed problem of first algo still applies
    result = x - 0.5
    resultCos = K.cos(2 * math.pi * x)
    return result + K.sin(2 * math.pi * x) * (1 + resultCos) * (13 + 2 * resultCos - 18 * K.pow(resultCos, 2) - 32 * K.pow(resultCos, 3) + 80 * K.pow(resultCos, 4)) / 15
# '''
'''
def roundingAlgo(x): 
    # made to fool the engine to have a gradient
    return 0 * x + K.round(x)
# '''


# check https://github.com/keras-team/keras/issues/2218
# check https://github.com/keras-team/keras/issues/2221
# https://www.tensorflow.org/api_docs/python/tf/custom_gradient
class RoundClampQ7_12(Layer):
    def __init__(self, **kwargs):
        super(RoundClampQ7_12, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundClampQ7_12, self).build(input_shape)
    def call(self, X):
        return K.clip(roundingAlgo(X * 4096), -524288, 524287) / 4096.0
    def get_config(self):
        base_config = super(RoundClampQ7_12, self).get_config()
        return dict(list(base_config.items()))
class RoundOverflowQ7_12(Layer):
    def __init__(self, **kwargs):
        super(RoundOverflowQ7_12, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundOverflowQ7_12, self).build(input_shape)
    def call(self, X):
        return (((roundingAlgo(X * 4096) + 524288) % 1048576) - 524288) / 4096.0
    def get_config(self):
        base_config = super(RoundOverflowQ7_12, self).get_config()
        return dict(list(base_config.items()))
class RoundClampQ3_4(Layer):
    def __init__(self, **kwargs):
        super(RoundClampQ3_4, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundClampQ3_4, self).build(input_shape)
    def call(self, X):
        return K.clip(roundingAlgo(X * 16), -128, 127) / 16.0
    def get_config(self):
        base_config = super(RoundClampQ3_4, self).get_config()
        return dict(list(base_config.items()))
class RoundOverflowQ3_4(Layer):
    def __init__(self, **kwargs):
        super(RoundOverflowQ3_4, self).__init__(**kwargs)
        self.trainable = False
    def build(self, input_shape):
        super(RoundOverflowQ3_4, self).build(input_shape)
    def call(self, X):
        return (((roundingAlgo(X * 16) + 128) % 256) - 128) / 16.0
    def get_config(self):
        base_config = super(RoundOverflowQ3_4, self).get_config()
        return dict(list(base_config.items()))
class Identity(Layer):
    def __init__(self, **kwargs):
        super(Identity, self).__init__(**kwargs)
        self.trainable = False
    def call(self, X):
        return X
    def get_config(self):
        base_config = super(Identity, self).get_config()
        return dict(list(base_config.items()))
class IdentityFinalLayer(Layer):
    def __init__(self, **kwargs):
        super(IdentityFinalLayer, self).__init__(**kwargs)
        self.trainable = False
    def call(self, X):
        return X
    def get_config(self):
        base_config = super(IdentityFinalLayer, self).get_config()
        return dict(list(base_config.items()))
    
def DBL(previousLayer, layerFilter, kernelSize=(3, 3), roundingFunction=Identity, name=None):
    placeholder = ""
    if name is not None:
        placeholder = str(name)
    else:
        placeholder = str(time.time_ns())
    return roundingFunction(name="ThirdRound_"+placeholder, dtype=K.floatx())(
        LeakyReLU(alpha=0.1, dtype=K.floatx())(
            roundingFunction(name="SecondRound_"+placeholder, dtype=K.floatx())(
                BatchNormalization(name="BatchNorm_"+placeholder, dtype=K.floatx())(
                    roundingFunction(name="FirstRound_"+placeholder, dtype=K.floatx())(
                        Conv2D(filters=layerFilter, kernel_size=kernelSize, padding='same', use_bias=False, kernel_regularizer=l2(5e-4), name="Conv2D_"+placeholder, dtype=K.floatx())(
                            previousLayer
                        )
                    )
                )
            )
        )
    )
print("Custom layer classes successfully defined")

Custom layer classes successfully defined


In [3]:
classificationClass = 80

print("Number of class classification is", classificationClass)

Number of class classification is 80


In [4]:
print()
def rand(a=0, b=1):
    return np.random.rand()*(b-a) + a

def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
    '''
    random preprocessing for real-time data augmentation 
    
    random=True induces image processing (better data accuracy with cost of cycles)
    '''
    line = annotation_line.split()
    image = Image.open(line[0])
    iw, ih = image.size
    h, w = input_shape
    box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

    if not random:
        # resize image
        scale = min(w/iw, h/ih)
        nw = int(iw*scale)
        nh = int(ih*scale)
        dx = (w-nw)//2
        dy = (h-nh)//2
        image_data=0
        if proc_img:
            image = image.resize((nw,nh), Image.BICUBIC)
            new_image = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)/255.

        # correct boxes
        box_data = np.zeros((max_boxes,5))
        if len(box)>0:
            np.random.shuffle(box)
            if len(box)>max_boxes: box = box[:max_boxes]
            box[:, [0,2]] = box[:, [0,2]]*scale + dx
            box[:, [1,3]] = box[:, [1,3]]*scale + dy
            box_data[:len(box)] = box

        return image_data, box_data

    # resize image
    new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
    scale = rand(.25, 2)
    if new_ar < 1:
        nh = int(scale*h)
        nw = int(nh*new_ar)
    else:
        nw = int(scale*w)
        nh = int(nw/new_ar)
    image = image.resize((nw,nh), Image.BICUBIC)

    # place image
    dx = int(rand(0, w-nw))
    dy = int(rand(0, h-nh))
    new_image = Image.new('RGB', (w,h), (128,128,128))
    new_image.paste(image, (dx, dy))
    image = new_image

    # flip image or not
    flip = rand()<.5
    if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

    # distort image
    hue = rand(-hue, hue)
    sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
    val = rand(1, val) if rand()<.5 else 1/rand(1, val)
    x = rgb_to_hsv(np.array(image)/255.)
    x[..., 0] += hue
    x[..., 0][x[..., 0]>1] -= 1
    x[..., 0][x[..., 0]<0] += 1
    x[..., 1] *= sat
    x[..., 2] *= val
    x[x>1] = 1
    x[x<0] = 0
    image_data = hsv_to_rgb(x) # numpy array, 0 to 1

    # correct boxes
    box_data = np.zeros((max_boxes,5))
    if len(box)>0:
        np.random.shuffle(box)
        box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
        box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
        if flip: box[:, [0,2]] = w - box[:, [2,0]]
        box[:, 0:2][box[:, 0:2]<0] = 0
        box[:, 2][box[:, 2]>w] = w
        box[:, 3][box[:, 3]>h] = h
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
        if len(box)>max_boxes: box = box[:max_boxes]
        box_data[:len(box)] = box

    return image_data, box_data

def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    '''Preprocess true boxes to training input format

    Parameters
    ----------
    true_boxes: array, shape=(m, T, 5)
        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
    input_shape: array-like, hw, multiples of 32
    anchors: array, shape=(N, 2), wh
    num_classes: integer

    Returns
    -------
    y_true: list of array, shape like yolo_outputs, xywh are reletive value

    '''
    assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
    num_layers = len(anchors)//3 # default setting
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]

    true_boxes = np.array(true_boxes, dtype='float32')
    input_shape = np.array(input_shape, dtype='int32')
    boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
    true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
    true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]

    m = true_boxes.shape[0]
    grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
    y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
        dtype='float32') for l in range(num_layers)]

    # Expand dim to apply broadcasting.
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
    anchor_mins = -anchor_maxes
    valid_mask = boxes_wh[..., 0]>0

    for b in range(m):
        # Discard zero rows.
        wh = boxes_wh[b, valid_mask[b]]
        if len(wh)==0: continue
        # Expand dim to apply broadcasting.
        wh = np.expand_dims(wh, -2)
        box_maxes = wh / 2.
        box_mins = -box_maxes

        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)

        # Find best anchor for each true box
        best_anchor = np.argmax(iou, axis=-1)

        for t, n in enumerate(best_anchor):
            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = true_boxes[b,t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5+c] = 1

    return y_true

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    '''data generator for fit_generator'''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i==0:
                np.random.shuffle(annotation_lines)
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)
            image_data.append(image)
            box_data.append(box)
            i = (i + 1) % n
        image_data = np.array(image_data)
        box_data = np.array(box_data)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)

def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
print("Target data generator successfully defined")


Target data generator successfully defined


In [5]:
# diambil dari https://github.com/qqwweee/keras-yolo3
'''
MIT License

Copyright (c) 2018 qqwweee

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

image_size = (448,448)
image_height, image_width = image_size

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)

tinyYolo_anchors = get_anchors("../CNN-VLSI/tiny_yolo_anchors.txt")

def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    # edited by instructions in https://stackoverflow.com/questions/57558476/training-a-keras-model-yields-multiple-optimizer-errors
    box_xy = (K.hard_sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats))
    box_confidence = K.hard_sigmoid(feats[..., 4:5])
    box_class_probs = K.hard_sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou

def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
    return loss

In [6]:
with open("../CNN-VLSI/train.txt") as trainText:
    train_annotation_lines = trainText.readlines()
with open("../CNN-VLSI/val.txt") as valText:
    val_annotation_lines = valText.readlines()
lenTrain = len(train_annotation_lines)
print("# of training data is", lenTrain)
lenVal = len(val_annotation_lines)
print("# of validation data is", lenVal)
np.random.shuffle(train_annotation_lines)
np.random.shuffle(val_annotation_lines)

# of training data is 117266
# of validation data is 4952


In [7]:
# model_0 does no rounding (float32 operation)

In [8]:
model_0 = None
'''
model_0_input = Input(shape=(None, None, 3), name="model_0_inputLayer")
# model_0_pointer = model_0_input
print("Input shape:", model_0_input.shape) # 448 x 448 x 3
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_input, layerFilter=16, name="model_0_layer0_branch") 
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 448 x 448 x 16
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 224 x 224 x 16
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=32, name="model_0_layer1_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 224 x 224 x 32
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 112 x 112 x 32
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=64, name="model_0_layer2_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 112 x 112 x 64
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 56 x 56 x 64
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=128, name="model_0_layer3_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 56 x 56 x 128
model_0_startBranch = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 28 x 28 x 128
model_0_startBranch = DBL(roundingFunction=Identity, previousLayer=model_0_startBranch, layerFilter=256, name="model_0_layer4_branch")
# model_0_pointer = model_0_startBranch
# print(model_0_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_0_branch0
model_0_branch0 = MaxPooling2D(pool_size=(2, 2))(model_0_startBranch)
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 256
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=512, name="model_0_layer5_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_0_branch0)
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=1024, name="model_0_layer6_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 1024
model_0_branch0 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=256, kernelSize=(1, 1), name="model_0_layer7_branch0")
# model_0_pointer = model_0_branch0
# print(model_0_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_0_branch0 (14 x 14 x 256), following model_0_branch00
model_0_branch00 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=128, kernelSize=(1, 1), name="model_0_layer8_branch00")
# model_0_pointer = model_0_branch00
# print(model_0_pointer.shape) # 14 x 14 x 128
model_0_branch00 = UpSampling2D()(model_0_branch00)
# model_0_pointer = model_0_branch00
# print(model_0_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_0_branch1 (unchanged from model_0_startBranch) and model_0_branch00
model_0_mergedBranch = Concatenate()([model_0_startBranch, model_0_branch00])
# model_0_pointer = model_0_mergedBranch
# print(model_0_pointer.shape) # 28 x 28 x 384
model_0_mergedBranch = DBL(roundingFunction=Identity, previousLayer=model_0_mergedBranch, layerFilter=256, name="model_0_layer9_branch1")
# model_0_pointer = model_0_mergedBranch
# print(model_0_pointer.shape) # 28 x 28 x 256
model_0_mergedBranch = DBL(roundingFunction=Identity, previousLayer=model_0_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_0_layerA_branch1")
# model_0_pointer = model_0_mergedBranch
print("Model output 1 shape:", model_0_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_0_mergedBranch = IdentityFinalLayer(name="model_0_outputLayer_1")(model_0_mergedBranch)
print() # OUTPUT = model_0_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_0_branch01
model_0_branch01 = DBL(roundingFunction=Identity, previousLayer=model_0_branch0, layerFilter=512, name="model_0_layer8_branch01")
# model_0_pointer = model_0_branch01
# print(model_0_pointer.shape) # 14 x 14 x 512
model_0_branch01 = DBL(roundingFunction=Identity, previousLayer=model_0_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_0_layer9_branch01")
# model_0_pointer = model_0_branch01
print("Model output 0 shape:", model_0_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_0_branch01 = IdentityFinalLayer(name="model_0_outputLayer_0")(model_0_branch01)
print() # OUTPUT = model_0_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_0_actual = Model(inputs=model_0_input, outputs=[model_0_branch01, model_0_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_0_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_0_actual.to_json())

try:
#     model_0_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_0_actual.load_weights("./saved_models/model_0_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_0")
except Exception as e:
    print("Failed to load existing model for model_0:", e)
try:
    model_0_actual.save_weights("./saved_models/model_0_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_0 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_0 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_0_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_0', arguments=setArgs_model_0)([*model_0_actual.output, *y_true_model_0])

model_0 = Model([model_0_actual.input, *y_true_model_0], model_0_loss)

# model_0_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_0 adalah pendekatan sehingga output model_0 sedekat mungkin dengan 0 (model_0 ≈ model_0_actual - y_true)
model_0_learnRate = 1e-2
model_0.compile(optimizer=Adam(lr=model_0_learnRate), loss={'yolo_loss_model_0': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_0 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [9]:
# model_1 approximates Q7.12 signed fixed point operations with floating point rules (overflow = maximum/minimum value) 
# Done by rounding to the nearest 1/4096 and capping at [-128, 128) after batch normalization and activation layers

In [10]:
model_1 = None
'''
model_1_input = Input(shape=(None, None, 3), name="model_1_inputLayer")
# model_1_pointer = model_1_input
print("Input shape:", model_1_input.shape) # 448 x 448 x 3
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_input, layerFilter=16, name="model_1_layer0_branch") 
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 448 x 448 x 16
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 224 x 224 x 16
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=32, name="model_1_layer1_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 224 x 224 x 32
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 112 x 112 x 32
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=64, name="model_1_layer2_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 112 x 112 x 64
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 56 x 56 x 64
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=128, name="model_1_layer3_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 56 x 56 x 128
model_1_startBranch = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 28 x 28 x 128
model_1_startBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_startBranch, layerFilter=256, name="model_1_layer4_branch")
# model_1_pointer = model_1_startBranch
# print(model_1_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_1_branch0
model_1_branch0 = MaxPooling2D(pool_size=(2, 2))(model_1_startBranch)
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 256
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=512, name="model_1_layer5_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_1_branch0)
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=1024, name="model_1_layer6_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 1024
model_1_branch0 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=256, kernelSize=(1, 1), name="model_1_layer7_branch0")
# model_1_pointer = model_1_branch0
# print(model_1_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_1_branch0 (14 x 14 x 256), following model_1_branch00
model_1_branch00 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=128, kernelSize=(1, 1), name="model_1_layer8_branch00")
# model_1_pointer = model_1_branch00
# print(model_1_pointer.shape) # 14 x 14 x 128
model_1_branch00 = UpSampling2D()(model_1_branch00)
# model_1_pointer = model_1_branch00
# print(model_1_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_1_branch1 (unchanged from model_1_startBranch) and model_1_branch00
model_1_mergedBranch = Concatenate()([model_1_startBranch, model_1_branch00])
# model_1_pointer = model_1_mergedBranch
# print(model_1_pointer.shape) # 28 x 28 x 384
model_1_mergedBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_mergedBranch, layerFilter=256, name="model_1_layer9_branch1")
# model_1_pointer = model_1_mergedBranch
# print(model_1_pointer.shape) # 28 x 28 x 256
model_1_mergedBranch = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_1_layerA_branch1")
# model_1_pointer = model_1_mergedBranch
print("Model output 1 shape:", model_1_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_1_mergedBranch = IdentityFinalLayer(name="model_1_outputLayer_1")(model_1_mergedBranch)
print() # OUTPUT = model_1_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_1_branch01
model_1_branch01 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch0, layerFilter=512, name="model_1_layer8_branch01")
# model_1_pointer = model_1_branch01
# print(model_1_pointer.shape) # 14 x 14 x 512
model_1_branch01 = DBL(roundingFunction=RoundClampQ7_12, previousLayer=model_1_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_1_layer9_branch01")
# model_1_pointer = model_1_branch01
print("Model output 0 shape:", model_1_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_1_branch01 = IdentityFinalLayer(name="model_1_outputLayer_0")(model_1_branch01)
print() # OUTPUT = model_1_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_1_actual = Model(inputs=model_1_input, outputs=[model_1_branch01, model_1_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_1_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_1_actual.to_json())

try:
#     model_1_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_1_actual.load_weights("./saved_models/model_1_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_1")
except Exception as e:
    print("Failed to load existing model for model_1:", e)
try:
    model_1_actual.save_weights("./saved_models/model_1_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_1 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_1 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_1_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_1', arguments=setArgs_model_1)([*model_1_actual.output, *y_true_model_1])

model_1 = Model([model_1_actual.input, *y_true_model_1], model_1_loss)

# model_1_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_1 adalah pendekatan sehingga output model_1 sedekat mungkin dengan 0 (model_1 ≈ model_1_actual - y_true)
model_1_learnRate = 1e-2
model_1.compile(optimizer=Adam(lr=model_1_learnRate), loss={'yolo_loss_model_1': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_1 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [11]:
# model_2 approximates Q7.12 signed fixed point operations with integer rules (overflow = positive -> negative & vice versa) 
# Done by rounding to the nearest 1/4096 and capping at [-128, 128) after batch normalization and activation layers

In [12]:
model_2 = None
'''
model_2_input = Input(shape=(None, None, 3), name="model_2_inputLayer")
# model_2_pointer = model_2_input
print("Input shape:", model_2_input.shape) # 448 x 448 x 3
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_input, layerFilter=16, name="model_2_layer0_branch") 
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 448 x 448 x 16
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 224 x 224 x 16
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=32, name="model_2_layer1_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 224 x 224 x 32
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 112 x 112 x 32
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=64, name="model_2_layer2_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 112 x 112 x 64
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 56 x 56 x 64
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=128, name="model_2_layer3_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 56 x 56 x 128
model_2_startBranch = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 28 x 28 x 128
model_2_startBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_startBranch, layerFilter=256, name="model_2_layer4_branch")
# model_2_pointer = model_2_startBranch
# print(model_2_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_2_branch0
model_2_branch0 = MaxPooling2D(pool_size=(2, 2))(model_2_startBranch)
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 256
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=512, name="model_2_layer5_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_2_branch0)
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=1024, name="model_2_layer6_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 1024
model_2_branch0 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=256, kernelSize=(1, 1), name="model_2_layer7_branch0")
# model_2_pointer = model_2_branch0
# print(model_2_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_2_branch0 (14 x 14 x 256), following model_2_branch00
model_2_branch00 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=128, kernelSize=(1, 1), name="model_2_layer8_branch00")
# model_2_pointer = model_2_branch00
# print(model_2_pointer.shape) # 14 x 14 x 128
model_2_branch00 = UpSampling2D()(model_2_branch00)
# model_2_pointer = model_2_branch00
# print(model_2_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_2_branch1 (unchanged from model_2_startBranch) and model_2_branch00
model_2_mergedBranch = Concatenate()([model_2_startBranch, model_2_branch00])
# model_2_pointer = model_2_mergedBranch
# print(model_2_pointer.shape) # 28 x 28 x 384
model_2_mergedBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_mergedBranch, layerFilter=256, name="model_2_layer9_branch1")
# model_2_pointer = model_2_mergedBranch
# print(model_2_pointer.shape) # 28 x 28 x 256
model_2_mergedBranch = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_2_layerA_branch1")
# model_2_pointer = model_2_mergedBranch
print("Model output 1 shape:", model_2_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_2_mergedBranch = IdentityFinalLayer(name="model_2_outputLayer_1")(model_2_mergedBranch)
print() # OUTPUT = model_2_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_2_branch01
model_2_branch01 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch0, layerFilter=512, name="model_2_layer8_branch01")
# model_2_pointer = model_2_branch01
# print(model_2_pointer.shape) # 14 x 14 x 512
model_2_branch01 = DBL(roundingFunction=RoundOverflowQ7_12, previousLayer=model_2_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_2_layer9_branch01")
# model_2_pointer = model_2_branch01
print("Model output 0 shape:", model_2_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_2_branch01 = IdentityFinalLayer(name="model_2_outputLayer_0")(model_2_branch01)
print() # OUTPUT = model_2_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_2_actual = Model(inputs=model_2_input, outputs=[model_2_branch01, model_2_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_2_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_2_actual.to_json())

try:
#     model_2_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_2_actual.load_weights("./saved_models/model_2_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_2")
except Exception as e:
    print("Failed to load existing model for model_2:", e)
try:
    model_2_actual.save_weights("./saved_models/model_2_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_2 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_2 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_2_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_2', arguments=setArgs_model_2)([*model_2_actual.output, *y_true_model_2])

model_2 = Model([model_2_actual.input, *y_true_model_2], model_2_loss)

# model_2_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_2 adalah pendekatan sehingga output model_2 sedekat mungkin dengan 0 (model_2 ≈ model_2_actual - y_true)
model_2_learnRate = 1e-2
model_2.compile(optimizer=Adam(lr=model_2_learnRate), loss={'yolo_loss_model_2': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_2 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [13]:
# model_3 approximates Q3.4 signed fixed point operations with floating point rules (overflow = maximum/minimum value) 
# Done by rounding to the nearest 1/16 and capping at [-8, 8) after batch normalization and activation layers

In [14]:
model_3 = None
'''
model_3_input = Input(shape=(None, None, 3), name="model_3_inputLayer")
# model_3_pointer = model_3_input
print("Input shape:", model_3_input.shape) # 448 x 448 x 3
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_input, layerFilter=16, name="model_3_layer0_branch") 
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 448 x 448 x 16
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 224 x 224 x 16
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=32, name="model_3_layer1_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 224 x 224 x 32
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 112 x 112 x 32
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=64, name="model_3_layer2_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 112 x 112 x 64
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 56 x 56 x 64
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=128, name="model_3_layer3_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 56 x 56 x 128
model_3_startBranch = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 28 x 28 x 128
model_3_startBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_startBranch, layerFilter=256, name="model_3_layer4_branch")
# model_3_pointer = model_3_startBranch
# print(model_3_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_3_branch0
model_3_branch0 = MaxPooling2D(pool_size=(2, 2))(model_3_startBranch)
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 256
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=512, name="model_3_layer5_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_3_branch0)
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=1024, name="model_3_layer6_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 1024
model_3_branch0 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=256, kernelSize=(1, 1), name="model_3_layer7_branch0")
# model_3_pointer = model_3_branch0
# print(model_3_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_3_branch0 (14 x 14 x 256), following model_3_branch00
model_3_branch00 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=128, kernelSize=(1, 1), name="model_3_layer8_branch00")
# model_3_pointer = model_3_branch00
# print(model_3_pointer.shape) # 14 x 14 x 128
model_3_branch00 = UpSampling2D()(model_3_branch00)
# model_3_pointer = model_3_branch00
# print(model_3_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_3_branch1 (unchanged from model_3_startBranch) and model_3_branch00
model_3_mergedBranch = Concatenate()([model_3_startBranch, model_3_branch00])
# model_3_pointer = model_3_mergedBranch
# print(model_3_pointer.shape) # 28 x 28 x 384
model_3_mergedBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_mergedBranch, layerFilter=256, name="model_3_layer9_branch1")
# model_3_pointer = model_3_mergedBranch
# print(model_3_pointer.shape) # 28 x 28 x 256
model_3_mergedBranch = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_3_layerA_branch1")
# model_3_pointer = model_3_mergedBranch
print("Model output 1 shape:", model_3_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_3_mergedBranch = IdentityFinalLayer(name="model_3_outputLayer_1")(model_3_mergedBranch)
print() # OUTPUT = model_3_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_3_branch01
model_3_branch01 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch0, layerFilter=512, name="model_3_layer8_branch01")
# model_3_pointer = model_3_branch01
# print(model_3_pointer.shape) # 14 x 14 x 512
model_3_branch01 = DBL(roundingFunction=RoundClampQ3_4, previousLayer=model_3_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_3_layer9_branch01")
# model_3_pointer = model_3_branch01
print("Model output 0 shape:", model_3_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_3_branch01 = IdentityFinalLayer(name="model_3_outputLayer_0")(model_3_branch01)
print() # OUTPUT = model_3_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_3_actual = Model(inputs=model_3_input, outputs=[model_3_branch01, model_3_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_3_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_3_actual.to_json())

try:
#     model_3_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
    model_3_actual.load_weights("./saved_models/model_3_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_3")
except Exception as e:
    print("Failed to load existing model for model_3:", e)
try:
    model_3_actual.save_weights("./saved_models/model_3_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_3 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_3 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_3_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_3', arguments=setArgs_model_3)([*model_3_actual.output, *y_true_model_3])

model_3 = Model([model_3_actual.input, *y_true_model_3], model_3_loss)

# model_3_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_3 adalah pendekatan sehingga output model_3 sedekat mungkin dengan 0 (model_3 ≈ model_3_actual - y_true)
model_3_learnRate = 1e-2
model_3.compile(optimizer=Adam(lr=model_3_learnRate), loss={'yolo_loss_model_3': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_3 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()




In [15]:
# model_4 approximates Q3.4 signed fixed point operations with integer rules (overflow = positive -> negative & vice versa) 
# Done by rounding to the nearest 1/16 and capping at [-8, 8) after batch normalization and activation layers

In [16]:
model_4 = None
# '''
model_4_input = Input(shape=(None, None, 3), name="model_4_inputLayer")
# model_4_pointer = model_4_input
print("Input shape:", model_4_input.shape) # 448 x 448 x 3
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_input, layerFilter=16, name="model_4_layer0_branch") 
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 448 x 448 x 16
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 224 x 224 x 16
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=32, name="model_4_layer1_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 224 x 224 x 32
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 112 x 112 x 32
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=64, name="model_4_layer2_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 112 x 112 x 64
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 56 x 56 x 64
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=128, name="model_4_layer3_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 56 x 56 x 128
model_4_startBranch = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 28 x 28 x 128
model_4_startBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_startBranch, layerFilter=256, name="model_4_layer4_branch")
# model_4_pointer = model_4_startBranch
# print(model_4_pointer.shape) # 28 x 28 x 256
print("Branch split from main branch - following branch 0") # 2 branch split from startBranch (28 x 28 x 256), following model_4_branch0
model_4_branch0 = MaxPooling2D(pool_size=(2, 2))(model_4_startBranch)
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 256
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=512, name="model_4_layer5_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch0 = MaxPooling2D(pool_size=(2, 2), strides=1, padding='same')(model_4_branch0)
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=1024, name="model_4_layer6_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 1024
model_4_branch0 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=256, kernelSize=(1, 1), name="model_4_layer7_branch0")
# model_4_pointer = model_4_branch0
# print(model_4_pointer.shape) # 14 x 14 x 256
print("Branch split from branch 0 - following branch 0,0") # 2 branch split from model_4_branch0 (14 x 14 x 256), following model_4_branch00
model_4_branch00 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=128, kernelSize=(1, 1), name="model_4_layer8_branch00")
# model_4_pointer = model_4_branch00
# print(model_4_pointer.shape) # 14 x 14 x 128
model_4_branch00 = UpSampling2D()(model_4_branch00)
# model_4_pointer = model_4_branch00
# print(model_4_pointer.shape) # 28 x 28 x 128
print("Branch merge from branch 1 and branch 0,0") # 2 branch merge from model_4_branch1 (unchanged from model_4_startBranch) and model_4_branch00
model_4_mergedBranch = Concatenate()([model_4_startBranch, model_4_branch00])
# model_4_pointer = model_4_mergedBranch
# print(model_4_pointer.shape) # 28 x 28 x 384
model_4_mergedBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_mergedBranch, layerFilter=256, name="model_4_layer9_branch1")
# model_4_pointer = model_4_mergedBranch
# print(model_4_pointer.shape) # 28 x 28 x 256
model_4_mergedBranch = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_mergedBranch, layerFilter=3 * (4 + 1 + classificationClass), name="model_4_layerA_branch1")
# model_4_pointer = model_4_mergedBranch
print("Model output 1 shape:", model_4_mergedBranch.shape) # 28 x 28 x (3 * (5 + classificationClass))
model_4_mergedBranch = IdentityFinalLayer(name="model_4_outputLayer_1")(model_4_mergedBranch)
print() # OUTPUT = model_4_mergedBranch (note: 26 x 26 grid untuk deteksi objek kecil)

print("Branch split from branch 0 - following branch 0,1")# following model_4_branch01
model_4_branch01 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch0, layerFilter=512, name="model_4_layer8_branch01")
# model_4_pointer = model_4_branch01
# print(model_4_pointer.shape) # 14 x 14 x 512
model_4_branch01 = DBL(roundingFunction=RoundOverflowQ3_4, previousLayer=model_4_branch01, layerFilter=3 * (4 + 1 + classificationClass), name="model_4_layer9_branch01")
# model_4_pointer = model_4_branch01
print("Model output 0 shape:", model_4_branch01.shape) # 14 x 14 x (3 * (5 + classificationClass))
model_4_branch01 = IdentityFinalLayer(name="model_4_outputLayer_0")(model_4_branch01)
print() # OUTPUT = model_4_branch01 (note: 13 x 13 grid untuk deteksi objek besar)

model_4_actual = Model(inputs=model_4_input, outputs=[model_4_branch01, model_4_mergedBranch]) # mengikuti model dari https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py

with open("./saved_models/model_4_inferenceModel.json", "wt") as jsonFile:
    jsonFile.write(model_4_actual.to_json())

try:
    model_4_actual.load_weights("../keras-yolo3/model_data/yolov3_tiny_roundOverflowQ3_4.h5", by_name=True, skip_mismatch=True)
#     model_4_actual.load_weights("./saved_models/model_4_checkpoint.h5", by_name=True, skip_mismatch=True)
    print("Weight load attempt success for model_4")
except Exception as e:
    print("Failed to load existing model for model_4:", e)
try:
    model_4_actual.save_weights("./saved_models/model_4_trainModel.h5")
    print("Loaded model is successfully re-saved")
except Exception as e:
    print("Failed to save loaded model:", e)
# added code from https://github.com/awe777/keras-yolo3/blob/master/yolo3/model.py
setArgs_model_4 = {
    'anchors': tinyYolo_anchors, 
    'num_classes': classificationClass, 
    'ignore_thresh': 0.2
}

y_true_model_4 = [Input(shape=(image_height//{0:32, 1:16}[l], image_width//{0:32, 1:16}[l], 3, classificationClass + 5)) for l in range(2)]

model_4_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss_model_4', arguments=setArgs_model_4)([*model_4_actual.output, *y_true_model_4])

model_4 = Model([model_4_actual.input, *y_true_model_4], model_4_loss)

# model_4_actual adalah working model yang akan dipakai untuk inference
# y_true adalah layer input yang akan diisikan nilai sebenarnya
# cara training model_4 adalah pendekatan sehingga output model_4 sedekat mungkin dengan 0 (model_4 ≈ model_4_actual - y_true)
model_4_learnRate = 1e-3
model_4.compile(optimizer=Adam(lr=model_4_learnRate), loss={'yolo_loss_model_4': lambda y_true, y_pred: y_pred})

# loss adalah seberapa jauh nilai perbedaan output dengan 0

print("Model model_4 compilation complete") # tinggal porting line 54 sampai 61 dari https://github.com/awe777/keras-yolo3/blob/master/train.py, dilakukan di cell training di bawah
# '''
print()

Input shape: (None, None, None, 3)
Branch split from main branch - following branch 0
Branch split from branch 0 - following branch 0,0
Branch merge from branch 1 and branch 0,0
Model output 1 shape: (None, None, None, 255)

Branch split from branch 0 - following branch 0,1
Model output 0 shape: (None, None, None, 255)

Weight load attempt success for model_4
Loaded model is successfully re-saved
Model model_4 compilation complete



In [17]:
if model_0 is not None:
    with open("./saved_models/model_0_summary.txt", "wt") as textFile:
        model_0.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [18]:
if model_1 is not None:
    with open("./saved_models/model_1_summary.txt", "wt") as textFile:
        model_1.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [19]:
if model_2 is not None:
    with open("./saved_models/model_2_summary.txt", "wt") as textFile:
        model_2.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [20]:
if model_3 is not None:
    with open("./saved_models/model_3_summary.txt", "wt") as textFile:
        model_3.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [21]:
if model_4 is not None:
    with open("./saved_models/model_4_summary.txt", "wt") as textFile:
        model_4.summary(print_fn=lambda x: textFile.write(x + '\n'), line_length=200)

In [22]:
print()
trainingBatchSize = 1
epochSplit = 2094 
# because high split = more work saved; 117266 mod 499 = 1 <= less image lost for (416,416)
# 117266 mod 2094 = 2 <= due to high loss of (448,448)
train_data_generator = data_generator_wrapper(
    annotation_lines=train_annotation_lines, 
    batch_size=trainingBatchSize, 
    input_shape=image_size, 
    anchors=tinyYolo_anchors, 
    num_classes=classificationClass
)
val_data_generator = data_generator_wrapper(
    annotation_lines=val_annotation_lines, 
    batch_size=trainingBatchSize, 
    input_shape=image_size, 
    anchors=tinyYolo_anchors, 
    num_classes=classificationClass
)
minimumLR = 1e-5
decayChance = 0.25
# class ReloadOnNaN(Callback):
#     def __init__(self, filepath=None):
#         super(ReloadOnNaN, self).__init__()
#         self.filepath = filepath
#     def on_batch_end(self, batch, logs=None):
#         logs = logs or {}
#         loss = logs.get('loss')
#         if loss is not None:
#             if np.isnan(loss) or np.isinf(loss):
#                 if np.isnan(loss):
#                     print('\nDetected nan loss at batch %d, terminating training' % (batch))
#                 else:
#                     print('\nDetected inf loss at batch %d, terminating training' % (batch))
# #                 self.model.load_weights(self.filepath, by_name=True, skip_mismatch=True)
# #                 self.model.reset_metrics()
#                 self.model.stop_training = True
print()





In [23]:
print()
''' // comment on this line to enable/disable this block
model_0_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_0_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_0_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_0_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
# model_0_checkpoint_force = ModelCheckpoint(
#     filepath='./saved_models/model_0_checkpoint.h5',
#     verbose=1,
#     save_weights_only=True,
#     period=5
# )
model_0_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 10: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_0_learnRate)
    print()
    model_0_history = model_0.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_0_checkpoint_val,
            model_0_checkpoint_loss,
#             model_0_checkpoint_force
        ]
    )
    if model_0_learnRate > minimumLR / model_0_LRDecay and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_0_learnRate = model_0_LRDecay * model_0_learnRate
    model_0.load_weights("./saved_models/model_0_checkpoint.h5")
    model_0.compile(optimizer=Adam(lr=model_0_learnRate), loss={'yolo_loss_model_0': lambda y_true, y_pred: y_pred})
    print()
print("model_0 training done in", str(time.time() - start_time))
model_0.save_weights("./saved_models/model_0_trainModel.h5")
# '''
print()





In [24]:
print()
''' // comment on this line to enable/disable this block
model_1_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_1_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_1_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_1_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
# model_1_checkpoint_force = ModelCheckpoint(
#     filepath='./saved_models/model_1_checkpoint.h5',
#     verbose=1,
#     save_weights_only=True,
#     period=5
# )
model_1_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 10: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_1_learnRate)
    print()
    model_1_history = model_1.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_1_checkpoint_val,
            model_1_checkpoint_loss,
#             model_1_checkpoint_force
        ]
    )
    if model_1_learnRate > minimumLR / model_1_LRDecay and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_1_learnRate = model_1_LRDecay * model_1_learnRate
    model_1.load_weights("./saved_models/model_1_checkpoint.h5")
    model_1.compile(optimizer=Adam(lr=model_1_learnRate), loss={'yolo_loss_model_1': lambda y_true, y_pred: y_pred})
    print()
print("model_1 training done in", str(time.time() - start_time))
model_1.save_weights("./saved_models/model_1_trainModel.h5")
# '''
print()





In [25]:
print()
''' // comment on this line to enable/disable this block
model_2_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_2_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_2_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_2_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
# model_2_checkpoint_force = ModelCheckpoint(
#     filepath='./saved_models/model_2_checkpoint.h5',
#     verbose=1,
#     save_weights_only=True,
#     period=5
# )
model_2_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 10: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_2_learnRate)
    print()
    model_2_history = model_2.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_2_checkpoint_val,
            model_2_checkpoint_loss,
#             model_2_checkpoint_force
        ]
    )
    if model_2_learnRate > minimumLR / model_2_LRDecay and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_2_learnRate = model_2_LRDecay * model_2_learnRate
    model_2.load_weights("./saved_models/model_2_checkpoint.h5")
    model_2.compile(optimizer=Adam(lr=model_2_learnRate), loss={'yolo_loss_model_2': lambda y_true, y_pred: y_pred})
    print()
print("model_2 training done in", str(time.time() - start_time))
model_2.save_weights("./saved_models/model_2_trainModel.h5")
# '''
print()





In [26]:
print()
''' // comment on this line to enable/disable this block
model_3_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_3_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_3_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_3_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
# model_3_checkpoint_force = ModelCheckpoint(
#     filepath='./saved_models/model_3_checkpoint.h5',
#     verbose=1,
#     save_weights_only=True,
#     period=5
# )
model_3_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
# while superEpochs <= 10: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_3_learnRate)
    print()
    model_3_history = model_3.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_3_checkpoint_val,
            model_3_checkpoint_loss,
#             model_3_checkpoint_force
        ]
    )
    if model_3_learnRate > minimumLR / model_3_LRDecay and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_3_learnRate = model_3_LRDecay * model_3_learnRate
    model_3.load_weights("./saved_models/model_3_checkpoint.h5")
    model_3.compile(optimizer=Adam(lr=model_3_learnRate), loss={'yolo_loss_model_3': lambda y_true, y_pred: y_pred})
    print()
print("model_3 training done in", str(time.time() - start_time))
model_3.save_weights("./saved_models/model_3_trainModel.h5")
# '''
print()





In [27]:
print()
# ''' // comment on this line to enable/disable this block
model_4_checkpoint_val = ModelCheckpoint(
    filepath='./saved_models/model_4_checkpoint.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min'
)
model_4_checkpoint_loss = ModelCheckpoint(
    filepath='./saved_models/model_4_checkpoint.h5',
    monitor='loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    period=2
)
# model_4_checkpoint_force = ModelCheckpoint(
#     filepath='./saved_models/model_4_checkpoint.h5',
#     verbose=1,
#     save_weights_only=True,
#     period=5
# )
model_4_LRDecay = math.pow(1 / 4, 1 / 3) # exponentially decays to 25% in 3 super-epochs 
superEpochs = 0
start_time = time.time()
# while time.time() - start_time < 6 * 3600: # guarantees at least 6 hours of training, unless the kernel crashes
while superEpochs <= 20: # guarantees 10 super-epochs, unless the kernel crashes
    superEpochs = superEpochs + 1
    print('Time', str(time.time() - start_time))
    print('Super-epoch', superEpochs, '- learn rate:', model_4_learnRate)
    print()
    model_4_history = model_4.fit_generator(
        generator=train_data_generator, 
        steps_per_epoch=max(1, (lenTrain // trainingBatchSize) // epochSplit), 
        epochs=1 * epochSplit,
        verbose=2,
        validation_data=val_data_generator,
        validation_steps=max(1, 15 * (lenVal // trainingBatchSize) // epochSplit),
        callbacks=[ 
            TerminateOnNaN(),
            model_4_checkpoint_val,
            model_4_checkpoint_loss,
#             model_4_checkpoint_force
        ]
    )
    if model_4_learnRate > minimumLR / model_4_LRDecay and np.random.rand() < decayChance:
        # guarantees learn rate above minimumLR
        # randomly decays learnRate with a predetermined probability
        print("Learn rate decayed")
        model_4_learnRate = model_4_LRDecay * model_4_learnRate
    model_4.load_weights("./saved_models/model_4_checkpoint.h5")
    model_4.compile(optimizer=Adam(lr=model_4_learnRate), loss={'yolo_loss_model_4': lambda y_true, y_pred: y_pred})
    print()
print("model_4 training done in", str(time.time() - start_time))
model_4.save_weights("./saved_models/model_4_trainModel.h5")
# '''
print()


Time 0.0
Super-epoch 1 - learn rate: 0.0005

Epoch 1/2094
 - 34s - loss: 2870.8932 - val_loss: 4772.9873

Epoch 00001: val_loss improved from inf to 4772.98730, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 2/2094
 - 22s - loss: 2838.4924 - val_loss: 3525.3269

Epoch 00002: val_loss improved from 4772.98730 to 3525.32690, saving model to ./saved_models/model_4_checkpoint.h5

Epoch 00002: loss improved from inf to 2838.49241, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 3/2094
 - 22s - loss: 2830.3024 - val_loss: 7679.3159

Epoch 00003: val_loss did not improve from 3525.32690
Epoch 4/2094
 - 22s - loss: 2846.3232 - val_loss: 3216.5825

Epoch 00004: val_loss improved from 3525.32690 to 3216.58252, saving model to ./saved_models/model_4_checkpoint.h5

Epoch 00004: loss did not improve from 2838.49241
Epoch 5/2094
 - 23s - loss: 2798.2960 - val_loss: 4092.5574

Epoch 00005: val_loss did not improve from 3216.58252
Epoch 6/2094
 - 22s - loss: 2816.2170 - val_los

  if self.monitor_op(current, self.best):



Time 371.88651514053345
Super-epoch 2 - learn rate: 0.0005

Epoch 1/2094
 - 31s - loss: 2888.3209 - val_loss: 4365.9946

Epoch 00001: val_loss did not improve from 2646.54810
Epoch 2/2094
 - 22s - loss: 2667.6779 - val_loss: 2919.0481

Epoch 00002: val_loss did not improve from 2646.54810

Epoch 00002: loss improved from 2811.52548 to 2667.67789, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 3/2094
 - 22s - loss: 2734.1612 - val_loss: 2609.9775

Epoch 00003: val_loss improved from 2646.54810 to 2609.97754, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 4/2094
 - 22s - loss: 2810.1166 - val_loss: 2598.7678

Epoch 00004: val_loss improved from 2609.97754 to 2598.76782, saving model to ./saved_models/model_4_checkpoint.h5

Epoch 00004: loss did not improve from 2667.67789
Epoch 5/2094
 - 22s - loss: 2804.7127 - val_loss: 2526.3030

Epoch 00005: val_loss improved from 2598.76782 to 2526.30298, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 6/2094
 - 22


Epoch 00053: val_loss did not improve from 2060.53052
Epoch 54/2094
 - 22s - loss: 2581.1487 - val_loss: 2460.5374

Epoch 00054: val_loss did not improve from 2060.53052

Epoch 00054: loss did not improve from 2523.39364
Epoch 55/2094
 - 22s - loss: 2528.6725 - val_loss: 2247.0903

Epoch 00055: val_loss did not improve from 2060.53052
Epoch 56/2094
 - 22s - loss: 2449.4718 - val_loss: 2664.4578

Epoch 00056: val_loss did not improve from 2060.53052

Epoch 00056: loss improved from 2523.39364 to 2449.47185, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 57/2094
 - 23s - loss: 2558.4124 - val_loss: 2660.4175

Epoch 00057: val_loss did not improve from 2060.53052
Epoch 58/2094
 - 22s - loss: 2663.8492 - val_loss: 2417.7124

Epoch 00058: val_loss did not improve from 2060.53052

Epoch 00058: loss did not improve from 2449.47185
Epoch 59/2094
 - 22s - loss: 2643.8474 - val_loss: 2342.1877

Epoch 00059: val_loss did not improve from 2060.53052
Epoch 60/2094
 - 22s - loss: 2558.3

 - 22s - loss: 2541.0639 - val_loss: 2244.4016

Epoch 00110: val_loss did not improve from 2060.53052

Epoch 00110: loss did not improve from 2308.03546
Epoch 111/2094
 - 22s - loss: 2477.4712 - val_loss: 2385.7253

Epoch 00111: val_loss did not improve from 2060.53052
Epoch 112/2094
 - 22s - loss: 2390.1639 - val_loss: 2384.4297

Epoch 00112: val_loss did not improve from 2060.53052

Epoch 00112: loss did not improve from 2308.03546
Epoch 113/2094
 - 23s - loss: 2320.7024 - val_loss: 2728.5305

Epoch 00113: val_loss did not improve from 2060.53052
Epoch 114/2094
 - 22s - loss: 2364.2837 - val_loss: 2207.0076

Epoch 00114: val_loss did not improve from 2060.53052

Epoch 00114: loss did not improve from 2308.03546
Epoch 115/2094
 - 22s - loss: 2351.2276 - val_loss: 2418.4858

Epoch 00115: val_loss did not improve from 2060.53052
Epoch 116/2094
 - 22s - loss: 2427.3490 - val_loss: 2082.3279

Epoch 00116: val_loss did not improve from 2060.53052

Epoch 00116: loss did not improve from 230

 - 22s - loss: 2217.4625 - val_loss: 2032.1458

Epoch 00016: val_loss did not improve from 1939.60242
Epoch 17/2094
 - 22s - loss: 2294.9689 - val_loss: 2247.2568

Epoch 00017: val_loss did not improve from 1939.60242

Epoch 00017: loss did not improve from 2231.53364
Epoch 18/2094
 - 22s - loss: 2235.2040 - val_loss: 3375.6033

Epoch 00018: val_loss did not improve from 1939.60242
Epoch 19/2094
 - 23s - loss: 2306.8036 - val_loss: 2617.5950

Epoch 00019: val_loss did not improve from 1939.60242

Epoch 00019: loss did not improve from 2231.53364
Epoch 20/2094
 - 22s - loss: 2361.1084 - val_loss: 2160.3406

Epoch 00020: val_loss did not improve from 1939.60242
Epoch 21/2094
 - 22s - loss: 2316.1253 - val_loss: 1945.1648

Epoch 00021: val_loss did not improve from 1939.60242

Epoch 00021: loss did not improve from 2231.53364
Epoch 22/2094
 - 22s - loss: 2351.0484 - val_loss: 1975.4697

Epoch 00022: val_loss did not improve from 1939.60242
Epoch 23/2094
Batch 8: Invalid loss, terminating 

  if self.monitor_op(current, self.best):



Time 4182.603794336319
Super-epoch 4 - learn rate: 0.0003149802624737183

Epoch 1/2094
 - 31s - loss: 2221.7237 - val_loss: 2224.7219

Epoch 00001: val_loss did not improve from 1939.60242
Epoch 2/2094
 - 22s - loss: 2226.2445 - val_loss: 2027.5253

Epoch 00002: val_loss did not improve from 1939.60242

Epoch 00002: loss improved from 2231.53364 to 2226.24447, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 3/2094
 - 22s - loss: 2304.1956 - val_loss: 2938.3501

Epoch 00003: val_loss did not improve from 1939.60242
Epoch 4/2094
 - 22s - loss: 2202.5202 - val_loss: 2165.6384

Epoch 00004: val_loss did not improve from 1939.60242

Epoch 00004: loss improved from 2226.24447 to 2202.52016, saving model to ./saved_models/model_4_checkpoint.h5
Epoch 5/2094
 - 22s - loss: 2366.2969 - val_loss: 2694.7983

Epoch 00005: val_loss did not improve from 1939.60242
Epoch 6/2094
 - 22s - loss: 2208.5003 - val_loss: 2089.0122

Epoch 00006: val_loss did not improve from 1939.60242

Epoch 0000

  if self.monitor_op(current, self.best):



Time 5179.1507267951965
Super-epoch 6 - learn rate: 0.00019842513149602494

Epoch 1/2094
 - 31s - loss: 2326.6470 - val_loss: 2206.1257

Epoch 00001: val_loss did not improve from 1939.60242
Epoch 2/2094
 - 22s - loss: 2254.8560 - val_loss: 2223.5562

Epoch 00002: val_loss did not improve from 1939.60242

Epoch 00002: loss did not improve from 2202.52016
Epoch 3/2094
 - 22s - loss: 2255.5470 - val_loss: 2795.9355

Epoch 00003: val_loss did not improve from 1939.60242
Epoch 4/2094
 - 22s - loss: 2322.8666 - val_loss: 2240.0154

Epoch 00004: val_loss did not improve from 1939.60242

Epoch 00004: loss did not improve from 2202.52016
Epoch 5/2094
 - 22s - loss: 2374.0438 - val_loss: 2096.7078

Epoch 00005: val_loss did not improve from 1939.60242
Epoch 6/2094
 - 23s - loss: 2324.7698 - val_loss: 2725.1338

Epoch 00006: val_loss did not improve from 1939.60242

Epoch 00006: loss did not improve from 2202.52016
Epoch 7/2094
 - 22s - loss: 2391.7447 - val_loss: 2247.2280

Epoch 00007: val_lo

 - 22s - loss: 2285.0130 - val_loss: 2926.7456

Epoch 00006: val_loss did not improve from 1920.42969
Epoch 7/2094
 - 22s - loss: 2300.3114 - val_loss: 1980.6692

Epoch 00007: val_loss did not improve from 1920.42969

Epoch 00007: loss did not improve from 2197.88217
Epoch 8/2094
 - 22s - loss: 2275.4061 - val_loss: 2047.0902

Epoch 00008: val_loss did not improve from 1920.42969
Epoch 9/2094
 - 22s - loss: 2310.5674 - val_loss: 1972.2487

Epoch 00009: val_loss did not improve from 1920.42969

Epoch 00009: loss did not improve from 2197.88217
Epoch 10/2094
 - 23s - loss: 2336.7645 - val_loss: 2149.5425

Epoch 00010: val_loss did not improve from 1920.42969
Epoch 11/2094
 - 22s - loss: 2289.3171 - val_loss: 2001.3987

Epoch 00011: val_loss did not improve from 1920.42969

Epoch 00011: loss did not improve from 2197.88217
Epoch 12/2094
 - 22s - loss: 2319.4692 - val_loss: 2103.0071

Epoch 00012: val_loss did not improve from 1920.42969
Epoch 13/2094
 - 22s - loss: 2262.9307 - val_loss: 2

 - 22s - loss: 2320.7319 - val_loss: 2932.9675

Epoch 00041: val_loss did not improve from 1920.42969

Epoch 00041: loss did not improve from 2159.86615
Epoch 42/2094
 - 23s - loss: 2336.0290 - val_loss: 2525.8691

Epoch 00042: val_loss did not improve from 1920.42969
Epoch 43/2094
 - 23s - loss: 2282.5214 - val_loss: 2003.2264

Epoch 00043: val_loss did not improve from 1920.42969

Epoch 00043: loss did not improve from 2159.86615
Epoch 44/2094
 - 23s - loss: 2250.2502 - val_loss: 2045.4440

Epoch 00044: val_loss did not improve from 1920.42969
Epoch 45/2094
 - 22s - loss: 2289.6523 - val_loss: 1917.0239

Epoch 00045: val_loss improved from 1920.42969 to 1917.02393, saving model to ./saved_models/model_4_checkpoint.h5

Epoch 00045: loss did not improve from 2159.86615
Epoch 46/2094
 - 22s - loss: 2264.4937 - val_loss: 2222.3459

Epoch 00046: val_loss did not improve from 1917.02393
Epoch 47/2094
 - 22s - loss: 2245.9495 - val_loss: 2036.0291

Epoch 00047: val_loss did not improve from


Epoch 00098: val_loss did not improve from 1917.02393
Epoch 99/2094
 - 22s - loss: 2207.0793 - val_loss: 2527.7214

Epoch 00099: val_loss did not improve from 1917.02393

Epoch 00099: loss did not improve from 2159.86615
Epoch 100/2094
Batch 30: Invalid loss, terminating training

Epoch 00100: val_loss did not improve from 1917.02393

Time 9043.24456501007
Super-epoch 9 - learn rate: 0.000125

Epoch 1/2094
 - 31s - loss: 2240.8079 - val_loss: 2256.8972

Epoch 00001: val_loss did not improve from 1917.02393

Epoch 00001: loss did not improve from 2159.86615
Epoch 2/2094
 - 22s - loss: 2290.2575 - val_loss: 2031.0081

Epoch 00002: val_loss did not improve from 1917.02393
Epoch 3/2094
 - 22s - loss: 2256.6622 - val_loss: 2480.9246

Epoch 00003: val_loss did not improve from 1917.02393

Epoch 00003: loss did not improve from 2159.86615
Epoch 4/2094
 - 22s - loss: 2282.9150 - val_loss: 2066.1113

Epoch 00004: val_loss did not improve from 1917.02393
Epoch 5/2094
 - 22s - loss: 2340.3071 - 

  if self.monitor_op(current, self.best):



Time 9212.033174037933
Super-epoch 10 - learn rate: 7.874506561842957e-05

Epoch 1/2094
 - 31s - loss: 2274.0428 - val_loss: 1957.8796

Epoch 00001: val_loss did not improve from 1917.02393
Epoch 2/2094
Batch 10: Invalid loss, terminating training

Epoch 00002: val_loss did not improve from 1917.02393

Epoch 00002: loss did not improve from 2159.86615


  if self.monitor_op(current, self.best):



Time 9265.666200876236
Super-epoch 11 - learn rate: 7.874506561842957e-05

Epoch 1/2094
 - 32s - loss: 2368.9091 - val_loss: 2155.6775

Epoch 00001: val_loss did not improve from 1917.02393
Epoch 2/2094
 - 22s - loss: 2245.1249 - val_loss: 2845.8145

Epoch 00002: val_loss did not improve from 1917.02393

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 22s - loss: 2209.3382 - val_loss: 2674.2471

Epoch 00003: val_loss did not improve from 1917.02393
Epoch 4/2094
 - 22s - loss: 2259.9269 - val_loss: 2861.5530

Epoch 00004: val_loss did not improve from 1917.02393

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 22s - loss: 2244.9969 - val_loss: 2326.4226

Epoch 00005: val_loss did not improve from 1917.02393
Epoch 6/2094
 - 22s - loss: 2298.9085 - val_loss: 2242.2334

Epoch 00006: val_loss did not improve from 1917.02393

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 22s - loss: 2292.6215 - val_loss: 2015.7244

Epoch 00007: val_los

  if self.monitor_op(current, self.best):



Time 9587.10349369049
Super-epoch 12 - learn rate: 7.874506561842957e-05

Epoch 1/2094
 - 32s - loss: 2267.4392 - val_loss: 2149.4600

Epoch 00001: val_loss did not improve from 1917.02393
Epoch 2/2094
 - 22s - loss: 2229.3710 - val_loss: 2299.2126

Epoch 00002: val_loss did not improve from 1917.02393

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 22s - loss: 2388.9050 - val_loss: 3261.6138

Epoch 00003: val_loss did not improve from 1917.02393
Epoch 4/2094
 - 22s - loss: 2352.8671 - val_loss: 1977.9467

Epoch 00004: val_loss did not improve from 1917.02393

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 22s - loss: 2329.5221 - val_loss: 2234.3289

Epoch 00005: val_loss did not improve from 1917.02393
Epoch 6/2094
 - 22s - loss: 2307.9049 - val_loss: 2071.5066

Epoch 00006: val_loss did not improve from 1917.02393

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 22s - loss: 2357.9584 - val_loss: 1999.8247

Epoch 00007: val_loss

 - 22s - loss: 2300.1470 - val_loss: 2146.7266

Epoch 00006: val_loss did not improve from 1874.67627
Epoch 7/2094
 - 22s - loss: 2219.9094 - val_loss: 2057.1287

Epoch 00007: val_loss did not improve from 1874.67627

Epoch 00007: loss did not improve from 2159.86615
Epoch 8/2094
 - 22s - loss: 2263.0983 - val_loss: 2089.3733

Epoch 00008: val_loss did not improve from 1874.67627
Epoch 9/2094
 - 22s - loss: 2217.3847 - val_loss: 2067.4319

Epoch 00009: val_loss did not improve from 1874.67627

Epoch 00009: loss did not improve from 2159.86615
Epoch 10/2094
 - 22s - loss: 2364.9433 - val_loss: 2116.1465

Epoch 00010: val_loss did not improve from 1874.67627
Epoch 11/2094
 - 22s - loss: 2316.9558 - val_loss: 2078.1086

Epoch 00011: val_loss did not improve from 1874.67627

Epoch 00011: loss did not improve from 2159.86615
Epoch 12/2094
 - 22s - loss: 2239.5874 - val_loss: 2986.7185

Epoch 00012: val_loss did not improve from 1874.67627
Epoch 13/2094
 - 22s - loss: 2255.6504 - val_loss: 2

 - 22s - loss: 2325.2429 - val_loss: 2050.6541

Epoch 00064: val_loss did not improve from 1874.67627
Epoch 65/2094
 - 22s - loss: 2338.6109 - val_loss: 2594.5693

Epoch 00065: val_loss did not improve from 1874.67627

Epoch 00065: loss did not improve from 2159.86615
Epoch 66/2094
 - 22s - loss: 2287.2835 - val_loss: 2641.5554

Epoch 00066: val_loss did not improve from 1874.67627
Epoch 67/2094
 - 22s - loss: 2268.2506 - val_loss: 1954.0857

Epoch 00067: val_loss did not improve from 1874.67627

Epoch 00067: loss did not improve from 2159.86615
Epoch 68/2094
 - 22s - loss: 2283.3727 - val_loss: 2269.5642

Epoch 00068: val_loss did not improve from 1874.67627
Epoch 69/2094
 - 22s - loss: 2263.4806 - val_loss: 2044.6333

Epoch 00069: val_loss did not improve from 1874.67627

Epoch 00069: loss did not improve from 2159.86615
Epoch 70/2094
 - 22s - loss: 2319.6064 - val_loss: 2572.4084

Epoch 00070: val_loss did not improve from 1874.67627
Epoch 71/2094
Batch 37: Invalid loss, terminating

  if self.monitor_op(current, self.best):



Time 12341.494601726532
Super-epoch 15 - learn rate: 4.9606282874006234e-05

Epoch 1/2094
 - 31s - loss: 2343.4771 - val_loss: 2247.3665

Epoch 00001: val_loss did not improve from 1874.67627
Epoch 2/2094
 - 22s - loss: 2256.2952 - val_loss: 2164.6709

Epoch 00002: val_loss did not improve from 1874.67627

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 22s - loss: 2269.1001 - val_loss: 2207.7881

Epoch 00003: val_loss did not improve from 1874.67627
Epoch 4/2094
 - 22s - loss: 2257.9236 - val_loss: 2187.3870

Epoch 00004: val_loss did not improve from 1874.67627

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
Batch 7: Invalid loss, terminating training

Epoch 00005: val_loss did not improve from 1874.67627

Time 12472.322889566422
Super-epoch 16 - learn rate: 4.9606282874006234e-05

Epoch 1/2094
 - 31s - loss: 2293.4290 - val_loss: 1860.4299

Epoch 00001: val_loss improved from 1874.67627 to 1860.42993, saving model to ./saved_models/model_4_checkpoi

  if self.monitor_op(current, self.best):



Time 12570.881450653076
Super-epoch 17 - learn rate: 3.125e-05

Epoch 1/2094
 - 31s - loss: 2195.7902 - val_loss: 2150.5781

Epoch 00001: val_loss did not improve from 1860.42993
Epoch 2/2094
 - 22s - loss: 2273.1482 - val_loss: 1890.0879

Epoch 00002: val_loss did not improve from 1860.42993

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 23s - loss: 2211.4412 - val_loss: 1957.9774

Epoch 00003: val_loss did not improve from 1860.42993
Epoch 4/2094
 - 22s - loss: 2323.6474 - val_loss: 2057.3816

Epoch 00004: val_loss did not improve from 1860.42993

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 22s - loss: 2251.9392 - val_loss: 2179.6628

Epoch 00005: val_loss did not improve from 1860.42993
Epoch 6/2094
 - 22s - loss: 2251.9860 - val_loss: 2062.1018

Epoch 00006: val_loss did not improve from 1860.42993

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 22s - loss: 2299.6405 - val_loss: 2153.2812

Epoch 00007: val_loss did not i

 - 22s - loss: 2308.5446 - val_loss: 2205.8923

Epoch 00038: val_loss did not improve from 1859.32910
Epoch 39/2094
 - 22s - loss: 2221.5533 - val_loss: 1955.2169

Epoch 00039: val_loss did not improve from 1859.32910

Epoch 00039: loss did not improve from 2159.86615
Epoch 40/2094
 - 22s - loss: 2339.6805 - val_loss: 2018.7987

Epoch 00040: val_loss did not improve from 1859.32910
Epoch 41/2094
 - 22s - loss: 2337.5291 - val_loss: 1963.7163

Epoch 00041: val_loss did not improve from 1859.32910

Epoch 00041: loss did not improve from 2159.86615
Epoch 42/2094
 - 22s - loss: 2308.0505 - val_loss: 2674.3440

Epoch 00042: val_loss did not improve from 1859.32910
Epoch 43/2094
 - 22s - loss: 2229.2073 - val_loss: 2122.1406

Epoch 00043: val_loss did not improve from 1859.32910

Epoch 00043: loss did not improve from 2159.86615
Epoch 44/2094
 - 22s - loss: 2334.6583 - val_loss: 2114.9414

Epoch 00044: val_loss did not improve from 1859.32910
Epoch 45/2094
 - 23s - loss: 2243.1145 - val_loss

 - 22s - loss: 2316.4487 - val_loss: 2040.1622

Epoch 00096: val_loss did not improve from 1859.32910
Epoch 97/2094
 - 22s - loss: 2237.9751 - val_loss: 2244.8975

Epoch 00097: val_loss did not improve from 1859.32910

Epoch 00097: loss did not improve from 2159.86615
Epoch 98/2094
 - 23s - loss: 2233.2696 - val_loss: 2366.4155

Epoch 00098: val_loss did not improve from 1859.32910
Epoch 99/2094
 - 22s - loss: 2261.7216 - val_loss: 1949.6470

Epoch 00099: val_loss did not improve from 1859.32910

Epoch 00099: loss did not improve from 2159.86615
Epoch 100/2094
 - 22s - loss: 2195.5727 - val_loss: 2834.5569

Epoch 00100: val_loss did not improve from 1859.32910
Epoch 101/2094
 - 22s - loss: 2389.2546 - val_loss: 2088.8333

Epoch 00101: val_loss did not improve from 1859.32910

Epoch 00101: loss did not improve from 2159.86615
Epoch 102/2094
 - 22s - loss: 2352.4911 - val_loss: 2078.6108

Epoch 00102: val_loss did not improve from 1859.32910
Epoch 103/2094
 - 23s - loss: 2223.4123 - val_

  if self.monitor_op(current, self.best):



Time 16433.337880134583
Super-epoch 20 - learn rate: 1.2401570718501559e-05

Epoch 1/2094
 - 32s - loss: 2293.1835 - val_loss: 2094.8474

Epoch 00001: val_loss did not improve from 1859.32910
Epoch 2/2094
 - 22s - loss: 2273.2257 - val_loss: 2026.4967

Epoch 00002: val_loss did not improve from 1859.32910

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 22s - loss: 2297.7698 - val_loss: 2156.9099

Epoch 00003: val_loss did not improve from 1859.32910
Epoch 4/2094
 - 22s - loss: 2259.7696 - val_loss: 2014.6500

Epoch 00004: val_loss did not improve from 1859.32910

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 23s - loss: 2351.7449 - val_loss: 2082.8972

Epoch 00005: val_loss did not improve from 1859.32910
Epoch 6/2094
 - 22s - loss: 2230.9232 - val_loss: 2096.4236

Epoch 00006: val_loss did not improve from 1859.32910

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 22s - loss: 2219.0500 - val_loss: 2080.9934

Epoch 00007: val_l

Epoch 1/2094
 - 32s - loss: 2301.9407 - val_loss: 2023.2935

Epoch 00001: val_loss did not improve from 1859.32910

Epoch 00001: loss did not improve from 2159.86615
Epoch 2/2094
 - 23s - loss: 2232.7723 - val_loss: 2141.8823

Epoch 00002: val_loss did not improve from 1859.32910
Epoch 3/2094
 - 23s - loss: 2307.5390 - val_loss: 2165.9219

Epoch 00003: val_loss did not improve from 1859.32910

Epoch 00003: loss did not improve from 2159.86615
Epoch 4/2094
 - 23s - loss: 2256.1972 - val_loss: 1972.8999

Epoch 00004: val_loss did not improve from 1859.32910
Epoch 5/2094
 - 23s - loss: 2270.7666 - val_loss: 1938.0935

Epoch 00005: val_loss did not improve from 1859.32910

Epoch 00005: loss did not improve from 2159.86615
Epoch 6/2094
 - 22s - loss: 2306.4334 - val_loss: 2133.0305

Epoch 00006: val_loss did not improve from 1859.32910
Epoch 7/2094
 - 22s - loss: 2284.0413 - val_loss: 2095.1604

Epoch 00007: val_loss did not improve from 1859.32910

Epoch 00007: loss did not improve from 21

  if self.monitor_op(current, self.best):



Time 18706.12945985794
Super-epoch 24 - learn rate: 1.2401570718501559e-05

Epoch 1/2094
 - 33s - loss: 2337.5227 - val_loss: 2091.0808

Epoch 00001: val_loss did not improve from 1859.32910
Epoch 2/2094
 - 22s - loss: 2332.1711 - val_loss: 2116.6301

Epoch 00002: val_loss did not improve from 1859.32910

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 23s - loss: 2268.2965 - val_loss: 2297.4622

Epoch 00003: val_loss did not improve from 1859.32910
Epoch 4/2094
 - 23s - loss: 2214.1834 - val_loss: 2098.8201

Epoch 00004: val_loss did not improve from 1859.32910

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 23s - loss: 2300.2769 - val_loss: 2683.8325

Epoch 00005: val_loss did not improve from 1859.32910
Epoch 6/2094
 - 22s - loss: 2211.2694 - val_loss: 2914.2241

Epoch 00006: val_loss did not improve from 1859.32910

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 23s - loss: 2251.0550 - val_loss: 2709.4590

Epoch 00007: val_lo

  if self.monitor_op(current, self.best):



Time 19120.510021209717
Super-epoch 25 - learn rate: 1.2401570718501559e-05

Epoch 1/2094
 - 32s - loss: 2279.3515 - val_loss: 2207.0959

Epoch 00001: val_loss did not improve from 1859.32910
Epoch 2/2094
 - 22s - loss: 2247.8618 - val_loss: 2199.2246

Epoch 00002: val_loss did not improve from 1859.32910

Epoch 00002: loss did not improve from 2159.86615
Epoch 3/2094
 - 22s - loss: 2297.0727 - val_loss: 1963.1969

Epoch 00003: val_loss did not improve from 1859.32910
Epoch 4/2094
 - 22s - loss: 2232.3542 - val_loss: 2345.7354

Epoch 00004: val_loss did not improve from 1859.32910

Epoch 00004: loss did not improve from 2159.86615
Epoch 5/2094
 - 22s - loss: 2251.2472 - val_loss: 1922.3170

Epoch 00005: val_loss did not improve from 1859.32910
Epoch 6/2094
 - 22s - loss: 2311.6171 - val_loss: 2335.8237

Epoch 00006: val_loss did not improve from 1859.32910

Epoch 00006: loss did not improve from 2159.86615
Epoch 7/2094
 - 22s - loss: 2332.4228 - val_loss: 2394.0774

Epoch 00007: val_l

KeyboardInterrupt: 

In [None]:
print("Resource successfully released")