### Dependencies

In [3]:
import os
import scipy.io
import scipy.misc
import numpy as np
from numpy import expand_dims
import pandas as pd
import PIL
import struct
import cv2

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Lambda, Conv2D, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
from tensorflow.keras.layers import add, concatenate
from tensorflow.keras.preprocessing.image import load_img, img_to_array

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from matplotlib.patches import Rectangle
from skimage.transform import resize

%matplotlib inline

### Set hyperparameters

In [4]:
net_h, net_w = 416, 416
obj_thresh, nms_thresh = 0.5, 0.45

labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
              "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
              "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
              "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
              "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
              "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
              "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
              "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
              "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
              "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

### Design neural network

In [5]:
# define block of conv layers
def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0

    for conv in convs:
        if count == (len(convs) -2) and skip:
            skip_connection = x
            
        count += 1

        if conv['stride'] > 1:
            x = ZeroPadding2D(((1,0), (1,0)))(x)
            
        x = Conv2D(conv['fileter'], conv['kernel'], stride=conv['stride'],
                   padding = 'valid' if conv['stride'] > 1 else 'same',
                   name = 'conv_' + str(conv['layer_ids']),
                   use_bias = False if conv['bnomr'] else True)(x)

        if conv['bnorm']:
            x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)

        if conv['leaky']:
            x = LeakyReLU(alpha = 0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if skip else x