In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display
from seaborn import color_palette
import cv2


In [2]:
#Model Hyperparameters
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-05
_LEAKY_RELU = 0.1
_AHCHORS = [(10, 13), (16,30), (33,23),
            (30,61), (62,45), (59,119),
            (116,90), (156,198), (373,326)]
_MODEL_SIZE = (416,416)

In [4]:
#Model Definition
#batch normalization and leaky ReLU

def batch_norm(inputs, training, data_format):
    return tf.layers.batch_normalization(
        inputs = inputs, axis = 1 if data_format == 'channels_first' else 3,
        momentum = _BATCH_NORM_DECAY, epsilon = _BATCH_NORM_EPSILON,
        scale = True, training = training
    )

def fixed_padding(inputs, kernel_size, data_format):
    pad_total = kernel_size - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg 
    if data_format == 'channels_first':
        padded_inputs = tf.pad(inputs, [[0,0], [0,0],
                                        [pad_beg, pad_end],
                                        [pad_beg, pad_end]])
    else:
        padded_inputs = tf.pad(inputs, [[0,0],
                                        [pad_beg, pad_end],
                                        [pad_beg, pad_end],
                                        [0,0]])
    return padded_inputs

def conv2d_fixed_padding(inputs, filters, kernel_size, data_format, strides = 1):
    if strides > 1:
        inputs = fixed_padding(inputs, kernel_size, data_format)
    return tf.layers.conv2d(
        inputs = inputs, filters = filters, kernel_size = kernel_size,
        strides = strides, padding = ('SAME' if strides == 1 else 'VAILD'),
        use_bias = False, data_format = data_format
    )


In [5]:
#Feature Extraction: Darknet-53

def darknet53_residual_block(inputs, filters, training, data_format, strides = 1):
    shortcut = inputs 
    inputs = conv2d_fixed_padding(
        inputs, filters = filters, kernel_size = 1,
        strides = strides, data_format=  data_format
    )
    inputs = batch_norm( inputs, training = training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha = _LEAKY_RELU)
    inputs = conv2d_fixed_padding(
        inputs, filters = 2 * filters, kernel_size=3, strides= strides,
        data_format= data_format
    )
    inputs = batch_norm(inputs, training= training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    inputs += shortcut
    return inputs 

def darknet53(inputs, training, data_format):
    inputs = conv2d_fixed_padding(
        inputs, filters= 32, kernel_size= 3,
        data_format = data_format
    )
    inputs = batch_norm (inputs, training = training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)
    inputs = conv2d_fixed_padding(
        inputs, filters = 64, kernel_size= 3, strides= 2, data_format= data_format
    )
    inputs = batch_norm(inputs, training = training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)
    inputs = darknet53_residual_block(inputs, filters = 32, training= training,data_format=data_format)
    inputs = conv2d_fixed_padding(
        inputs, filters= 128, kernel_size= 3, strides= 2, data_format= data_format
    )
    inputs = batch_norm(inputs, training= training, data_format= data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    for _ in range(8):
        inputs = darknet53_residual_block(
            inputs, filters = 128,
            training = training,
            data_format = data_format
        )

        route1 = inputs 
        inputs = conv2d_fixed_padding(
            inputs, filters = 512, kernel_size=3,
            strides = 2, data_format = data_format
        )
        inputs = batch_norm(inputs, training = training, data_format = data_format)
        inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)


    for _ in range(8):
        inputs = darknet53_residual_block(
            inputs, filters = 256, training= training, data_format=data_format
        )
        route2 = inputs 
        inputs = conv2d_fixed_padding(
            inputs, filters = 1024, kernel_size= 3,
            strides = 2, data_format = data_format
        )
        inputs = batch_norm(inputs, training= training, data_format=data_format)
        inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    for _ in range(4):
        inputs = darknet53_residual_block(
            inputs, filters=512,
            training = training,
            data_format = data_format
        )

    return route1, route2, inputs


In [6]:
#Convolution Layers
def yolo_convolution_block(inputs, filters, training, data_format):
    inputs = conv2d_fixed_padding(
        inputs, filters = filters, kernel_size=1,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters = 2*filters, kernel_size=3,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters = filters, kernel_size=1,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters = 2*filters, kernel_size=3,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    inputs = conv2d_fixed_padding(
        inputs, filters = filters, kernel_size=1,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    route = inputs 
    
    inputs = conv2d_fixed_padding(
        inputs, filters = 2*filters, kernel_size=3,data_format=data_format
    )

    inputs = batch_norm(inputs, training=training, data_format=data_format)
    inputs = tf.nn.leaky_relu(inputs, alpha= _LEAKY_RELU)

    return route, inputs 





In [None]:
# Detection Layers

def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
    n_anchors = len(anchors)

    inputs = tf.layers.conv2d(inputs, filters = n_anchors * (5 + n_classes),
                              kernel_size = 1, strides = 1, use_bias = True,
                              data_format = data_format)
    
    shape = inputs.get_shape().as_list()

    grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]

if data_format == 'channels_first':
    inputs = tf.transpose(inputs, [0,2,3,1])
inputs = tf.reshape(
    inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1],
             5 + n_classes]
)

strides = (img_size[0] // grid_shape[0], img_size[1]// grid_shape[1])

box_centers, box_shapes, confidence, classes = tf.split(inputs, [2,2,1,n_classes] , axis = -1)


    