### SSD exploration

In [45]:
"""
This notebook explores parts of the SSD model based on the keras implementation
https://github.com/pierluigiferrari/ssd_keras
"""

import sys
sys.path.append("../../ssd_keras/")

import os
from datetime import datetime

import numpy as np
import tensorflow as tf
import tensorboard
import keras
from keras.utils import plot_model
from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam
from imageio import imread
from matplotlib import pyplot as plt

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

%matplotlib inline

In [46]:
IMAGE_HEIGHT = 300
IMAGE_WIDTH = 300

In [56]:
logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")  # define a log directory
K.clear_session()  # Clear previous models from memory.

model = ssd_300(image_size=(IMAGE_HEIGHT, IMAGE_WIDTH, 3),
                n_classes=20,
                mode='inference',
                l2_regularization=0.0005,
                scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05],
                # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
                two_boxes_for_ar1=True,
                steps=[8, 16, 32, 64, 100, 300],
                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                clip_boxes=False,
                variances=[0.1, 0.1, 0.2, 0.2],
                normalize_coords=True,
                subtract_mean=[123, 117, 104],
                swap_channels=[2, 1, 0],
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

# Write the session to explore with tensorboard
with K.get_session() as sess:
    writer = tf.summary.FileWriter(logdir, sess.graph)
    writer.close()

# plot_model(model, 'ssd_300.png', show_shapes=True, show_layer_names=True)
print(model.summary())

AttributeError: __enter__

In [48]:
"""
The model uses VGG16 network as a feature extractor. 
Predictions are generated using multiple feature maps.
Starting from the layer Conv4_3 SSD produces prediction from 6 layers
"""
conv4_3 = model.get_layer('conv4_3')
next_layers = conv4_3._outbound_nodes
[print(l.get_config()) for l in next_layers]

conv4_3_norm = model.get_layer('conv4_3_norm')

next_layers = conv4_3_norm._outbound_nodes
[print(l.get_config()) for l in next_layers]

print("conv4_3 output: ", conv4_3.output.shape)
print("conf: ", model.get_layer('conv4_3_norm_mbox_conf').output.shape)
print("loc: ", model.get_layer('conv4_3_norm_mbox_loc').output.shape)

{'outbound_layer': 'pool4', 'inbound_layers': ['conv4_3'], 'node_indices': [0], 'tensor_indices': [0]}
{'outbound_layer': 'conv4_3_norm', 'inbound_layers': ['conv4_3'], 'node_indices': [0], 'tensor_indices': [0]}
{'outbound_layer': 'conv4_3_norm_mbox_conf', 'inbound_layers': ['conv4_3_norm'], 'node_indices': [0], 'tensor_indices': [0]}
{'outbound_layer': 'conv4_3_norm_mbox_loc', 'inbound_layers': ['conv4_3_norm'], 'node_indices': [0], 'tensor_indices': [0]}
conv4_3 output:  (?, 38, 38, 512)
conf:  (?, 38, 38, 84)
loc:  (?, 38, 38, 16)


In [19]:
"""
The first layer generating predictions is Conv4_3.
For each location of the grid cells there are 4 different predictions. 
Which are split along two branches, one for confidence scores and one for bounding boxes
"""

In [60]:
"""
Since the network is not able to generate absolute predictions, 
Constant values for the anchor boxes are used as constants in the implementations
"""
prior = model.get_layer('conv4_3_norm_mbox_priorbox')
prior_model = keras.Model(inputs=model.inputs, outputs=prior.output)
dummy_img = np.random.rand(1, 300, 300, 3)
anchor_boxes = prior_model.predict(dummy_img)

In [61]:
print(anchor_boxes.shape)

(1, 38, 38, 4, 8)
