## 『本次練習內容』
#### 學習搭建RPN層

## 『本次練習目的』
  #### 了解Object Detection演算法中是如何做到分類又回歸BBOX座標

In [1]:
from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed

Using TensorFlow backend.


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
input_shape_img = (1024, 1024, 3)
img_input = Input(shape=input_shape_img)

In [4]:
'''先過一般CNN層提取特徵'''
def nn_base(img_input):
    print("input :", img_input.shape)
    
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    # 縮水1/2 1024x1024 -> 512x512
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    print("block1:", x.shape)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    # 縮水1/2 512x512 -> 256x256
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    print("block2:", x.shape)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    # 縮水1/2 256x256 -> 128x128
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    print("block3:", x.shape)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    # 縮水1/2 128x128 -> 64x64
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    print("block4:", x.shape)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    print("return:", x.shape)

    # 最後返回的x是64x64x512的feature map。
    return x

In [5]:
'''過RPN'''
def rpn(base_layers, num_anchors):

    # nn_base()返回的x是64x64x512的feature map.
    x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)

    # rpn分類和迴歸
    #2k output channels
    x_class = Conv2D(num_anchors * 2, (1, 1), activation='softmax',name='rpn_out_class')(x)
    #4k output channels
    x_reg   = Conv2D(num_anchors * 4, (1, 1), activation='linear', name='rpn_out_regress')(x)
    print("class:", x_class.shape)
    print("regre:", x_reg.shape)

    return x_class, x_reg, base_layers

In [6]:
base_layers = nn_base(img_input)

input : (None, 1024, 1024, 3)
block1: (None, 512, 512, 64)
block2: (None, 256, 256, 128)
block3: (None, 128, 128, 256)
block4: (None, 64, 64, 512)
return: (None, 64, 64, 512)


In [7]:
x_class, x_reg, base_layers = rpn(base_layers, 9)

class: (None, 64, 64, 18)
regre: (None, 64, 64, 36)


In [8]:
print('Classification  Branch：',x_class.shape) #'''確認深度是否為18'''
print('BBOX Regression Branch：',x_reg.shape) #'''確認深度是否為36'''
print('CNN Output            ：',base_layers.shape)

Classification  Branch： (None, 64, 64, 18)
BBOX Regression Branch： (None, 64, 64, 36)
CNN Output            ： (None, 64, 64, 512)
