In [1]:
import matplotlib.pyplot as plt
import numpy as np
import time
import cv2

## The Summary of Faster RCNN

In [5]:
def faster_rcnn_summary(net, layer_type=False, layer_shape=False, 
                        get_flops=False, get_memory=False,
                        get_total_time=False, get_layer_time=False):
    # read the model
#     net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
    
    # check if the model is empty
    if net.empty():
        print("Net is empty")
        return
        
    # get the types of layers used in the model
    if layer_type:
        print("Net contains:")
        for t in net.getLayerTypes():
            print("\t{} layers of type {}".format(net.getLayersCount(t), t))
    
    # get the tensor shapes for the loaded model and specified input shape
    if layer_shape:
        layers_ids, in_shapes, out_shapes = net.getLayersShapes([1, 3, 224, 224])
        layers_names = net.getLayerNames()
        print("Net layers shapes: ")
        for l in range(len(layers_names)):
            in_num, out_num = len(in_shapes[l]), len(out_shapes[l])
            print("  Layer {} has {} inputs and {} outputs".format(layers_names[l],
                                                                 in_num, out_num))
            for i in range(in_num):
                print("\tinput #{} has shape {}".format(i, in_shapes[l][i].flatten()))
            for i in range(out_num):
                print("\toutput #{} has shape {}".format(i, out_shapes[l][i].flatten()))
    
    # compute the number of FLOPs
    if get_flops:
        print("gflops: ", net.getFLOPS((1, 3, 224, 224)) * 1e-9)
    
    # report the amount of memory consumed for storing weights and intermediate tensors
    if get_memory:
        w, b = net.getMemoryConsumption((1, 3, 224, 224))
        print("weights (mb):", w * 1e-6, ", blobs (mb):", b * 1e-6)
    
    # perform a forward pass for a mock input:
    blob = cv2.dnn.blobFromImage(np.zeros((224, 224, 3), np.uint8), 1, (224, 224))
    net.setInput(blob)
    net.forward()
    
    # report the total time
    if get_total_time:
        total, timings = net.getPerfProfile()
        tick2ms = 1000 / cv2.getTickFrequency()
        print("inference (ms): {:2f}".format(total * tick2ms))

    # report the per layer inference time
    if get_layer_time:
        layer_names = net.getLayerNames()
        print("{: <30} {}".format("LAYER", "TIME (ms)"))
        for (i,t) in enumerate(timings):
            print("{: <30} {:.2f}".format(layer_names[i], t[0] * tick2ms))

In [3]:
model_path = "./model/frozen_inference_graph.pb"
proto_path = "./model/faster_rcnn_resnet50_coco_2018_01_28.pbtxt"

In [4]:
net = cv2.dnn.readNetFromTensorflow(model_path, proto_path)

In [7]:
faster_rcnn_summary(net, layer_type=True)

Net contains:
	56 layers of type Convolution
	1 layers of type CropAndResize
	2 layers of type DetectionOutput
	16 layers of type Eltwise
	4 layers of type Flatten
	2 layers of type InnerProduct
	3 layers of type Permute
	5 layers of type Pooling
	1 layers of type PriorBox
	49 layers of type ReLU
	2 layers of type ReLU6
	5 layers of type Reshape
	1 layers of type Scale
	1 layers of type Shift
	2 layers of type Slice
	2 layers of type Softmax
	1 layers of type __NetInputLayer__


In [8]:
faster_rcnn_summary(net, layer_shape=True)

Net layers shapes: 
  Layer Preprocessor/sub has 1 inputs and 1 outputs
	input #0 has shape [  1   3 224 224]
	output #0 has shape [  1   3 224 224]
  Layer FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/conv1/Conv2D has 1 inputs and 1 outputs
	input #0 has shape [  1   3 224 224]
	output #0 has shape [  1   3 224 224]
  Layer FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/conv1/Relu has 1 inputs and 1 outputs
	input #0 has shape [  1   3 224 224]
	output #0 has shape [  1  64 112 112]
  Layer FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/pool1/MaxPool has 1 inputs and 1 outputs
	input #0 has shape [  1  64 112 112]
	output #0 has shape [  1  64 112 112]
  Layer FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D has 1 inputs and 1 outputs
	input #0 has shape [  1  64 112 112]
	output #0 has shape [ 1 64 56 56]
  Layer FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Relu has 1 inputs and

	input #0 has shape [ 1 24 14 14]
	output #0 has shape [ 1 14 14 24]
  Layer FirstStageBoxPredictor/ClassPredictor/softmax has 1 inputs and 1 outputs
	input #0 has shape [ 1 14 14 24]
	output #0 has shape [   1 2352    2]
  Layer FirstStageBoxPredictor/ClassPredictor/softmax/flatten has 1 inputs and 1 outputs
	input #0 has shape [   1 2352    2]
	output #0 has shape [   1 2352    2]
  Layer FirstStageBoxPredictor/BoxEncodingPredictor/flatten/nchw has 1 inputs and 1 outputs
	input #0 has shape [   1 2352    2]
	output #0 has shape [   1 4704]
  Layer FirstStageBoxPredictor/BoxEncodingPredictor/flatten has 1 inputs and 1 outputs
	input #0 has shape [ 1 48 14 14]
	output #0 has shape [ 1 14 14 48]
  Layer proposals has 1 inputs and 1 outputs
	input #0 has shape [ 1 14 14 48]
	output #0 has shape [   1 9408]
  Layer detection_out has 2 inputs and 1 outputs
	input #0 has shape [ 1 48 14 14]
	input #1 has shape [  1   3 224 224]
	output #0 has shape [   1    2 9408]
  Layer detection_out/cli

In [9]:
faster_rcnn_summary(net, get_flops=True, get_memory=True)

gflops:  154.542609992
weights (mb): 116.64647199999999 , blobs (mb): 712.595872


In [10]:
faster_rcnn_summary(net, get_total_time=True, get_layer_time=True)

inference (ms): 12268.492365
LAYER                          TIME (ms)
Preprocessor/sub               0.26
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/conv1/Conv2D 38.35
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/conv1/Relu 0.00
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/pool1/MaxPool 5.17
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D 5.17
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Relu 0.00
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D 40.58
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Relu 0.00
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/conv3/Conv2D 16.46
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/Conv2D 24.35
FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/add 