In [6]:
# Importing libraries
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from keras.layers import Input
from __future__ import division
import numpy as np
import tensorflow.keras.backend as K
from tensorflow.keras.layers import InputSpec
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2

# Prior Box base line codes

1. Convert Co-ordinates ▶ 
Convert coordinates for axis-aligned 2D boxes between two coordinate formats.
    Creates a copy of `tensor`, i.e. does not operate in place. Currently there are
    three supported coordinate formats that can be converted from and to each other:
        1) (xmin, xmax, ymin, ymax) - the 'minmax' format
        2) (xmin, ymin, xmax, ymax) - the 'corners' format
        2) (cx, cy, w, h) - the 'centroids' format
    


2. Anchorbox ▶
A Keras layer to create an output tensor containing anchor box coordinates.
    
    Input shape:
        4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
        or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
    Output shape:
        5D tensor of shape `(batch, height, width, n_boxes, 8)`. The last axis contains
        the four anchor box coordinates and the four variance values for each box.



In [7]:
# base function for prior boxes

def convert_coordinates(tensor, start_index, conversion, border_pixels='half'):
    if border_pixels == 'half':
        d = 0
    elif border_pixels == 'include':
        d = 1
    elif border_pixels == 'exclude':
        d = -1

    ind = start_index
    tensor1 = np.copy(tensor).astype(np.float)
    if conversion == 'minmax2centroids':
        tensor1[..., ind] = (tensor[..., ind] + tensor[..., ind+1]) / 2.0 # Set cx
        tensor1[..., ind+1] = (tensor[..., ind+2] + tensor[..., ind+3]) / 2.0 # Set cy
        tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind] + d # Set w
        tensor1[..., ind+3] = tensor[..., ind+3] - tensor[..., ind+2] + d # Set h
    elif conversion == 'centroids2minmax':
        tensor1[..., ind] = tensor[..., ind] - tensor[..., ind+2] / 2.0 # Set xmin
        tensor1[..., ind+1] = tensor[..., ind] + tensor[..., ind+2] / 2.0 # Set xmax
        tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind+3] / 2.0 # Set ymin
        tensor1[..., ind+3] = tensor[..., ind+1] + tensor[..., ind+3] / 2.0 # Set ymax
    elif conversion == 'corners2centroids':
        tensor1[..., ind] = (tensor[..., ind] + tensor[..., ind+2]) / 2.0 # Set cx
        tensor1[..., ind+1] = (tensor[..., ind+1] + tensor[..., ind+3]) / 2.0 # Set cy
        tensor1[..., ind+2] = tensor[..., ind+2] - tensor[..., ind] + d # Set w
        tensor1[..., ind+3] = tensor[..., ind+3] - tensor[..., ind+1] + d # Set h
    elif conversion == 'centroids2corners':
        tensor1[..., ind] = tensor[..., ind] - tensor[..., ind+2] / 2.0 # Set xmin
        tensor1[..., ind+1] = tensor[..., ind+1] - tensor[..., ind+3] / 2.0 # Set ymin
        tensor1[..., ind+2] = tensor[..., ind] + tensor[..., ind+2] / 2.0 # Set xmax
        tensor1[..., ind+3] = tensor[..., ind+1] + tensor[..., ind+3] / 2.0 # Set ymax
    elif (conversion == 'minmax2corners') or (conversion == 'corners2minmax'):
        tensor1[..., ind+1] = tensor[..., ind+2]
        tensor1[..., ind+2] = tensor[..., ind+1]
    else:
        raise ValueError("Unexpected conversion value. Supported values are 'minmax2centroids', 'centroids2minmax', 'corners2centroids', 'centroids2corners', 'minmax2corners', and 'corners2minmax'.")

    return tensor1

class AnchorBoxes(Layer):
    def __init__(self,
                 img_height,
                 img_width,
                 this_scale,
                 next_scale,
                 aspect_ratios=[0.5, 1.0, 2.0],
                 two_boxes_for_ar1=True,
                 this_steps=None,
                 this_offsets=None,
                 clip_boxes=False,
                 variances=[0.1, 0.1, 0.2, 0.2],
                 coords='centroids',
                 normalize_coords=False,
                 **kwargs):
        if K.backend() != 'tensorflow':
            raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))

        if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
            raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))

        if len(variances) != 4:
            raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
        variances = np.array(variances)
        if np.any(variances <= 0):
            raise ValueError("All variances must be >0, but the variances given are {}".format(variances))

        self.img_height = img_height
        self.img_width = img_width
        self.this_scale = this_scale
        self.next_scale = next_scale
        self.aspect_ratios = aspect_ratios
        self.two_boxes_for_ar1 = two_boxes_for_ar1
        self.this_steps = this_steps
        self.this_offsets = this_offsets
        self.clip_boxes = clip_boxes
        self.variances = variances
        self.coords = coords
        self.normalize_coords = normalize_coords
        # Compute the number of boxes per cell
        if (1 in aspect_ratios) and two_boxes_for_ar1:
            self.n_boxes = len(aspect_ratios) + 1
        else:
            self.n_boxes = len(aspect_ratios)
        super(AnchorBoxes, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        super(AnchorBoxes, self).build(input_shape)

    def call(self, x, mask=None):

        # Compute box width and height for each aspect ratio
        # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
        size = min(self.img_height, self.img_width)
        # Compute the box widths and and heights for all aspect ratios
        wh_list = []
        for ar in self.aspect_ratios:
            if (ar == 1):
                # Compute the regular anchor box for aspect ratio 1.
                box_height = box_width = self.this_scale * size
                wh_list.append((box_width, box_height))
                if self.two_boxes_for_ar1:
                    # Compute one slightly larger version using the geometric mean of this scale value and the next.
                    box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
                    wh_list.append((box_width, box_height))
            else:
                box_height = self.this_scale * size / np.sqrt(ar)
                box_width = self.this_scale * size * np.sqrt(ar)
                wh_list.append((box_width, box_height))
        wh_list = np.array(wh_list)

        # We need the shape of the input tensor
        if K.image_data_format() == 'channels_last':
            batch_size, feature_map_height, feature_map_width, feature_map_channels = x.shape.as_list()
        else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
            batch_size, feature_map_channels, feature_map_height, feature_map_width = x.shape.as_list()

        # Compute the grid of box center points. They are identical for all aspect ratios.

        # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
        if (self.this_steps is None):
            step_height = self.img_height / feature_map_height
            step_width = self.img_width / feature_map_width
        else:
            if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2):
                step_height = self.this_steps[0]
                step_width = self.this_steps[1]
            elif isinstance(self.this_steps, (int, float)):
                step_height = self.this_steps
                step_width = self.this_steps
        # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
        if (self.this_offsets is None):
            offset_height = 0.5
            offset_width = 0.5
        else:
            if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2):
                offset_height = self.this_offsets[0]
                offset_width = self.this_offsets[1]
            elif isinstance(self.this_offsets, (int, float)):
                offset_height = self.this_offsets
                offset_width = self.this_offsets
        # Now that we have the offsets and step sizes, compute the grid of anchor box center points.
        cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
        cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
        cx_grid, cy_grid = np.meshgrid(cx, cy)
        cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
        cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down

        # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
        # where the last dimension will contain `(cx, cy, w, h)`
        boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))

        boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
        boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
        boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
        boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h

        # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
        boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')

        # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries
        if self.clip_boxes:
            x_coords = boxes_tensor[:,:,:,[0, 2]]
            x_coords[x_coords >= self.img_width] = self.img_width - 1
            x_coords[x_coords < 0] = 0
            boxes_tensor[:,:,:,[0, 2]] = x_coords
            y_coords = boxes_tensor[:,:,:,[1, 3]]
            y_coords[y_coords >= self.img_height] = self.img_height - 1
            y_coords[y_coords < 0] = 0
            boxes_tensor[:,:,:,[1, 3]] = y_coords

        # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
        if self.normalize_coords:
            boxes_tensor[:, :, :, [0, 2]] /= self.img_width
            boxes_tensor[:, :, :, [1, 3]] /= self.img_height

        # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
        if self.coords == 'centroids':
            # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half')
        elif self.coords == 'minmax':
            # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half')

        # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
        # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
        variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
        variances_tensor += self.variances # Long live broadcasting
        # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
        boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)

        # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
        # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
        boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
        boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))

        return boxes_tensor

    def compute_output_shape(self, input_shape):
        if K.image_dim_ordering() == 'tf':
            batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
        else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
            batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
        return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)

    def get_config(self):
        config = {
            'img_height': self.img_height,
            'img_width': self.img_width,
            'this_scale': self.this_scale,
            'next_scale': self.next_scale,
            'aspect_ratios': list(self.aspect_ratios),
            'two_boxes_for_ar1': self.two_boxes_for_ar1,
            'clip_boxes': self.clip_boxes,
            'variances': list(self.variances),
            'coords': self.coords,
            'normalize_coords': self.normalize_coords
        }
        base_config = super(AnchorBoxes, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


In [8]:
#designing fire architecture
def fire(string_name=None, layer_parameter=None):
  layer = layer_parameter
  if string_name=='fire1':
    #squeeze convolution layer
    fire1_squeeze = layers.Conv2D(15, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire1_expand_1= layers.Conv2D(49, (1,1),activation='relu', kernel_initializer='he_normal')(fire1_squeeze)
    # expand layer (3x3)
    fire1_expand_3 = layers.Conv2D(53, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire1_squeeze)
    # concat layer
    fire1 = layers.Concatenate()([fire1_expand_1,fire1_expand_3])
    return fire1
  elif string_name=='fire2':
    #squeeze convolution layer
    fire2_squeeze = layers.Conv2D(15, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire2_expand_1= layers.Conv2D(54, (1,1),activation='relu', kernel_initializer='he_normal')(fire2_squeeze)
    # expand layer (3x3)
    fire2_expand_3 = layers.Conv2D(52, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire2_squeeze)
    # concat layer
    fire2 = layers.Concatenate()([fire2_expand_1,fire2_expand_3])
    return fire2
  elif string_name=='fire3':
    #squeeze convolution layer
    fire3_squeeze = layers.Conv2D(29, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire3_expand_1= layers.Conv2D(92, (1,1),activation='relu', kernel_initializer='he_normal')(fire3_squeeze)
    # expand layer (3x3)
    fire3_expand_3 = layers.Conv2D(94, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire3_squeeze)
    # concat layer
    fire3 = layers.Concatenate()([fire3_expand_1,fire3_expand_3])
    return fire3
  elif string_name=='fire4':
    #squeeze convolution layer
    fire4_squeeze = layers.Conv2D(29, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire4_expand_1= layers.Conv2D(90, (1,1),activation='relu', kernel_initializer='he_normal')(fire4_squeeze)
    # expand layer (3x3)
    fire4_expand_3 = layers.Conv2D(93, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire4_squeeze)
    # concat layer
    fire4 = layers.Concatenate()([fire4_expand_1,fire4_expand_3])
    return fire4
  elif string_name=='fire5':
    #squeeze convolution layer
    fire5_squeeze = layers.Conv2D(44, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire5_expand_1= layers.Conv2D(166, (1,1),activation='relu', kernel_initializer='he_normal')(fire5_squeeze)
    # expand layer (3x3)
    fire5_expand_3 = layers.Conv2D(161, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire5_squeeze)
    # concat layer
    fire5 = layers.Concatenate()([fire5_expand_1,fire5_expand_3])
    return fire5
  elif string_name=='fire6':
    #squeeze convolution layer
    fire6_squeeze = layers.Conv2D(45, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire6_expand_1= layers.Conv2D(155, (1,1),activation='relu', kernel_initializer='he_normal')(fire6_squeeze)
    # expand layer (3x3)
    fire6_expand_3 = layers.Conv2D(146, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire6_squeeze)
    # concat layer
    fire6 = layers.Concatenate()([fire6_expand_1,fire6_expand_3])
    return fire6
  elif string_name=='fire7':
    #squeeze convolution layer
    fire7_squeeze = layers.Conv2D(49, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire7_expand_1= layers.Conv2D(163, (1,1),activation='relu', kernel_initializer='he_normal')(fire7_squeeze)
    # expand layer (3x3)
    fire7_expand_3 = layers.Conv2D(171, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire7_squeeze)
    # concat layer
    fire7 = layers.Concatenate()([fire7_expand_1,fire7_expand_3])
    return fire7
  elif string_name=='fire8':
    #squeeze convolution layer
    fire8_squeeze = layers.Conv2D(25, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire8_expand_1= layers.Conv2D(29, (1,1),activation='relu', kernel_initializer='he_normal')(fire8_squeeze)
    # expand layer (3x3)
    fire8_expand_3 = layers.Conv2D(54, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire8_squeeze)
    # concat layer
    fire8 = layers.Concatenate()([fire8_expand_1,fire8_expand_3])
    return fire8
  elif string_name == 'fire9':
    #squeeze convolution layer
    fire9_squeeze = layers.Conv2D(37, (1,1), kernel_initializer='he_normal')(layer)
    # expand layer (1x1)
    fire9_expand_1= layers.Conv2D(45, (1,1),activation='relu', kernel_initializer='he_normal')(fire9_squeeze)
    # batchnormalization and scaling
    fire9_batchnorm_exp1 = layers.BatchNormalization()(fire9_expand_1)
    # expand layer (3x3)
    fire9_expand_3 = layers.Conv2D(56, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire9_squeeze)
    # batchnormalization and scaling
    fire9_batchnorm_exp3 = layers.BatchNormalization()(fire9_expand_3)
    # relu activation
    fire9_batchnorm_exp3_relu = layers.Activation('relu')(fire9_batchnorm_exp3)
    # concat layer
    fire9 = layers.Concatenate()([fire9_batchnorm_exp1,fire9_batchnorm_exp3_relu])
    return fire9
  elif string_name =='fire10':
    #squeeze convolution layer
    fire10_squeeze = layers.Conv2D(38, (1,1), kernel_initializer='he_normal')(layer)
    # batchnormalization and scaling
    fire10_batchnorm_squ = layers.BatchNormalization()(fire10_squeeze)
    # expand layer (1x1)
    fire10_expand_1= layers.Conv2D(41, (1,1),activation='relu', kernel_initializer='he_normal')(fire10_batchnorm_squ)
    # batchnormalization and scaling
    fire10_batchnorm_exp1 = layers.BatchNormalization()(fire10_expand_1)
    # expand layer (3x3)
    fire10_expand_3 = layers.Conv2D(44, (3,3), padding = 'same', activation = 'relu', kernel_initializer='he_normal')(fire10_batchnorm_squ)
    # batchnormalization and scaling
    fire10_batchnorm_exp3 = layers.BatchNormalization()(fire10_expand_3)
    # relu activation
    fire10_batchnorm_exp3_relu = layers.Activation('relu')(fire10_batchnorm_exp3)
    # concat layer
    fire10 = layers.Concatenate()([fire10_batchnorm_exp1,fire10_batchnorm_exp3_relu])
    return fire10

In [52]:
# Localization and confidence layer
def localization(string_name=None, layer_parameter=None):
  layer = layer_parameter
  if string_name == 'Conv13_2':
    conv13_2_loc = layers.Conv2D(85, (3,3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2)(layer)
    flatten =layers.Flatten()(conv13_2_loc)
    return flatten
  elif string_name == 'Conv12_2':
    conv12_2_loc = layers.Conv2D(16, (3,3), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2)(layer)
    flatten =layers.Flatten()(conv12_2_loc)
    return flatten
  elif string_name == 'fire5':
    fire5_mbox_loc = layers.Conv2D(16, (3,3), strides=1,padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire5_mbox_loc)
    return flatten
  elif string_name == 'fire9':
    fire9_mbox_loc = layers.Conv2D(24, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire9_mbox_loc)
    return flatten
  elif string_name == 'fire10':
    fire10_mbox_loc = layers.Conv2D(24, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire10_mbox_loc)
    return flatten
  elif string_name == 'fire11':
    fire11_mbox_loc = layers.Conv2D(24, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire11_mbox_loc)
    return flatten

def confidence(string_name=None, layer_parameter=None):
  layer = layer_parameter
  if string_name == 'Conv13_2':
    conv13_2_conf = layers.Conv2D(84, (3,3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2)(layer)
    flatten =layers.Flatten()(conv13_2_conf)
    return flatten
  elif string_name == 'Conv12_2':
    conv12_2_conf = layers.Conv2D(126, (3,3), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2)(layer)
    flatten =layers.Flatten()(conv12_2_conf)
    return flatten
  elif string_name == 'fire5':
    fire5_mbox_conf = layers.Conv2D(84, (3,3), strides=1,padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire5_mbox_conf)
    return flatten
  elif string_name == 'fire9':
    fire9_mbox_conf = layers.Conv2D(126, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire9_mbox_conf)
    return flatten
  elif string_name == 'fire10':
    fire10_mbox_conf = layers.Conv2D(24, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire10_mbox_conf)
    return flatten
  elif string_name == 'fire11':
    fire11_mbox_conf = layers.Conv2D(126, (3,3), strides=1, padding='same', kernel_initializer='he_normal')(layer)
    flatten =layers.Flatten()(fire11_mbox_conf)
    return flatten

In [53]:
# prior box calling function
def fire_priorbox(string_name=None,layer_name=None):
  # parameters
  layer = layer_name
  n_predictor_layers = 6
  min_scale , max_scale = 0.1, 0.2
  scales=None
  variances=[0.1, 0.1, 0.2, 0.2]
  steps=[8, 16, 32, 64, 100, 300]
  offsets=None
  two_boxes_for_ar1=True
  coords='centroids'
  if scales:
        if len(scales) != n_predictor_layers+1:
            raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format(n_predictor_layers+1, len(scales)))
  else: 
      scales = np.linspace(min_scale, max_scale, n_predictor_layers+1)
  if len(variances) != 4:
        raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
  variances = np.array(variances)
  if np.any(variances <= 0):
      raise ValueError("All variances must be >0, but the variances given are {}".format(variances))

  if (not (steps is None)) and (len(steps) != n_predictor_layers):
      raise ValueError("You must provide at least one step value per predictor layer.")

  if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
      raise ValueError("You must provide at least one offset value per predictor layer.")

  ############################################################################
  # Compute the anchor box parameters.
  ############################################################################
  aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                          [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                          [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                          [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                          [1.0, 2.0, 0.5],
                          [1.0, 2.0, 0.5]]
  # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
  if aspect_ratios_per_layer:
      aspect_ratios = aspect_ratios_per_layer
  else:
      aspect_ratios = [aspect_ratios_global] * n_predictor_layers
  # Compute the number of boxes to be predicted per cell for each predictor layer.
  if aspect_ratios_per_layer:
      n_boxes = []
      for ar in aspect_ratios_per_layer:
          if (1 in ar) & two_boxes_for_ar1:
              n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1
          else:
              n_boxes.append(len(ar))
  else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
      if (1 in aspect_ratios_global) & two_boxes_for_ar1:
          n_boxes = len(aspect_ratios_global) + 1
      else:
          n_boxes = len(aspect_ratios_global)
      n_boxes = [n_boxes] * n_predictor_layers
 
  if steps is None:
      steps = [None] * n_predictor_layers
  if offsets is None:
      offsets = [None] * n_predictor_layers
  
  clip_boxes=False
  variances=[0.1, 0.1, 0.2, 0.2]
  normalize_coords=True
  ################################## calling statement ########################################################################
  if string_name == 'fire4':
    img_height, img_width = 37, 37
    fire4_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='fire4_mbox_priorbox')(layer)
    return fire4_mbox
  elif string_name == 'fire8':
    img_height, img_width = 18,  18
    fire8_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='fire8_mbox_priorbox')(layer)
    return fire8_mbox
  elif string_name == 'fire9':
    img_height, img_width = 9, 9
    fire9_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='fire9_mbox_priorbox')(layer)
    return fire9_mbox
  elif string_name == 'fire10':
    img_height, img_width = 4, 4
    fire10_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='fire10_mbox_priorbox')(layer)
    return fire10_mbox
  elif string_name == 'Conv12_2':
    img_height, img_width = 2,2
    conv12_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='Conv12_2_mbox_priorbox')(layer)
    return conv12_mbox
  else:
    img_height, img_width = 1,1
    conv13_mbox = AnchorBoxes(img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5],
                              two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], clip_boxes=clip_boxes,
                        variances=variances, coords=coords, normalize_coords=normalize_coords, name='Conv13_2_mbox_priorbox')(layer)
    return conv13_mbox

In [54]:


# desigining the final tiny-ssd model

def TinySSD():
  input = Input(shape=(300,300,3))
  ssd_model = models.Sequential()
  ########################################################## Base layer #############################################################
  conv1 = layers.Conv2D(57,(3,3),strides=(2,2),activation='relu',kernel_initializer='glorot_uniform')(input)
  maxpool1 = layers.MaxPool2D((3,3),strides=2)(conv1)
  # fire 1
  fire1 = fire('fire1',maxpool1)
  # fire 2
  fire2 = fire('fire2',fire1)
  maxpool2 = layers.MaxPool2D((3,3), strides=2, padding='same')(fire2)
  # fire 3
  fire3 = fire('fire3',maxpool2)
  # fire 4
  fire4 = fire('fire4',fire3)
  maxpool3 = layers.MaxPool2D((3,3), strides=2)(fire4)
  # fire 5
  fire5 = fire('fire5', maxpool3)
  # fire 6
  fire6 = fire('fire6', fire5)
  # fire 7
  fire7 = fire('fire7', fire6)
  # fire 8
  fire8 = fire('fire8', fire7)
  maxpool4 = layers.MaxPool2D((3,3), strides=2, padding='same')(fire8)
  # fire 9
  fire9 = fire('fire9', maxpool4)
  maxpool5 = layers.MaxPool2D((3,3), strides=2)(fire9)
  # fire 10
  fire10 = fire('fire10', maxpool5)

  ##################################################### Object detection layer #########################################################
  # Conv12-1/s2
  conv12_1 = layers.Conv2D(51,(1,1),padding='same',activation='relu',kernel_initializer='glorot_uniform')(fire10)
  # conv12_1_batchnorm = layers.BatchNormalization()(conv12_1)
  # Conv12-2
  conv12_2 = layers.Conv2D(46,(3,3),strides=1,padding='same',activation='relu')(conv12_1)
  # Conv13-1
  conv13_1 = layers.Conv2D(55,(1,1),padding='valid',activation='relu')(conv12_2)
  conv13_1_batchnorm = layers.BatchNormalization()(conv13_1)
  # Conv13-2
  conv13_2 = layers.Conv2D(85,(3,3),strides=2, padding='same', activation='relu')(conv13_1_batchnorm)
  conv13_2_maxpool = layers.Conv2D(85,(3,3),strides=2, padding='same', activation='relu')(conv13_2)
  ##################################################Localization layer#############################################################
  fire5_mbox_loc = localization('fire5', fire4)
  fire9_mbox_loc = localization('fire9', fire8)
  fire10_mbox_loc = localization('fire10', fire9)
  fire11_mbox_loc = localization('fire11', fire10)
  Conv12_2_mbox_loc = localization('Conv12_2', conv13_1)
  Conv13_2_mbox_loc = localization('Conv13_2', conv13_2)
  # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
  # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
  fire5_mbox_loc_reshape = layers.Reshape((-1, 4), name='fire5_mbox_loc_reshape')(fire5_mbox_loc)
  fire9_mbox_loc_reshape  = layers.Reshape((-1, 4), name='fire9_mbox_loc_reshape')(fire9_mbox_loc)
  fire10_mbox_loc_reshape = layers.Reshape((-1, 4), name='fire10_mbox_loc_reshape')(fire10_mbox_loc)
  fire11_mbox_loc_reshape = layers.Reshape((-1, 4), name='fire11_mbox_loc_reshape')(fire11_mbox_loc)
  Conv12_2_mbox_loc_reshape = layers.Reshape((-1, 4), name='conv12_2_mbox_loc_reshape')(Conv12_2_mbox_loc)
  Conv13_2_mbox_loc_reshape = layers.Reshape((-1, 4), name='conv13_2_mbox_loc_reshape')(Conv13_2_mbox_loc)
  # # #Concating all mbox layers
  loc_mbox = layers.Concatenate(axis=1, name='mbox_prioirbox')([fire5_mbox_loc_reshape,
                                                            fire9_mbox_loc_reshape,
                                                            fire10_mbox_loc_reshape,
                                                            fire11_mbox_loc_reshape,
                                                            Conv12_2_mbox_loc_reshape,
                                                            Conv13_2_mbox_loc_reshape])


  #######################################################Confidence layer###################################################################
  fire5_mbox_conf = confidence('fire5', fire4)
  fire9_mbox_conf = confidence('fire9', fire8)
  fire10_mbox_conf = confidence('fire10', fire9)
  fire11_mbox_conf = confidence('fire11', fire10)
  Conv12_2_mbox_conf = confidence('Conv12_2', conv13_1)
  Conv13_2_mbox_conf = confidence('Conv13_2', conv13_2)
  # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
  # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
  fire5_mbox_conf_reshape = layers.Reshape((-1, 4), name='fire5_mbox_conf_reshape')(fire5_mbox_conf)
  fire9_mbox_conf_reshape  = layers.Reshape((-1, 4), name='fire9_mbox_conf_reshape')(fire9_mbox_conf)
  fire10_mbox_conf_reshape = layers.Reshape((-1, 4), name='fire10_mbox_conf_reshape')(fire10_mbox_conf)
  fire11_mbox_conf_reshape = layers.Reshape((-1, 4), name='fire11_mbox_conf_reshape')(fire11_mbox_conf)
  Conv12_2_mbox_conf_reshape = layers.Reshape((-1, 4), name='conv12_2_mbox_conf_reshape')(Conv12_2_mbox_conf)
  Conv13_2_mbox_conf_reshape = layers.Reshape((-1, 4), name='conv13_2_mbox_conf_reshape')(Conv13_2_mbox_conf)
  # # #Concating all mbox layers
  conf_box = layers.Concatenate(axis=1, name='mbox_conf')([fire5_mbox_conf_reshape,
                                                            fire9_mbox_conf_reshape,
                                                            fire10_mbox_conf_reshape,
                                                            fire11_mbox_conf_reshape,
                                                            Conv12_2_mbox_conf_reshape,
                                                            Conv13_2_mbox_conf_reshape])
  # The box coordinate predictions will go into the loss function just the way they are,
  # but for the class predictions, we'll apply a softmax activation layer first
  mbox_conf_softmax = layers.Activation('softmax', name='mbox_conf_softmax')(conf_box)
  ###############################################################prior box ############################################################################################################

  # fire 4 prior box
  fire_4_priorbox = fire_priorbox('fire4',fire4)
  # fire 8 prior box
  fire_8_priorbox = fire_priorbox('fire8',fire8)
  # fire 9 prior box
  fire_9_priorbox = fire_priorbox('fire9',fire9)
  # fire 10 prior box
  fire_10_priorbox = fire_priorbox('fire10',fire10)
  # fire 8 prior box
  Conv12_2_priorbox = fire_priorbox('Conv12_2',conv13_1)
  # fire 8 prior box
  Conv13_2_priorbox = fire_priorbox('conv13_2',conv13_2)
  # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
  fire_4_priorbox_reshape = layers.Reshape((-1, 4), name='fire4_mbox_reshape')(fire_4_priorbox)
  fire_8_priorbox_reshape = layers.Reshape((-1, 4),name='fire8_mbox_reshape')(fire_8_priorbox)
  fire_9_priorbox_reshape = layers.Reshape((-1, 4),name='fire9_mbox_reshape')(fire_9_priorbox)
  fire_10_priorbox_reshape = layers.Reshape((-1, 4),name='fire10_mbox_reshape')(fire_10_priorbox)
  Conv12_2_priorbox_reshape = layers.Reshape((-1, 4),name='Conv12_2_mbox_reshape')(Conv12_2_priorbox)
  Conv13_2_priorbox_reshape = layers.Reshape((-1, 4),name='Conv13_2_mbox_reshape')(Conv13_2_priorbox)
  # #Concating all mbox layers
  prior_box = layers.Concatenate(axis=1, name='prioirbox')([fire_4_priorbox_reshape,
                                                            fire_8_priorbox_reshape,
                                                            fire_9_priorbox_reshape,
                                                            fire_10_priorbox_reshape,
                                                            Conv12_2_priorbox_reshape,
                                                            Conv13_2_priorbox_reshape])


  ##########################################################Final prediction layer###############################################################
  # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
  predictions = layers.Concatenate(axis=1, name='predictions')([mbox_conf_softmax, loc_mbox, prior_box])
  ssd_model = models.Model(inputs=input, outputs=predictions)
  return ssd_model

In [55]:
#calling tiny SSD model
model = TinySSD()
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_13 (InputLayer)          [(None, 300, 300, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_566 (Conv2D)            (None, 149, 149, 57  1596        ['input_13[0][0]']               
                                )                                                                 
                                                                                                  
 max_pooling2d_60 (MaxPooling2D  (None, 74, 74, 57)  0           ['conv2d_566[0][0]']             
 )                                                                                          