## 『本次練習內容』
#### 學習搭建RPN層

## 『本次練習目的』
  #### 了解 Object Detection 演算法中是如何做到分類又回歸 BBOX 座標

In [1]:
from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D, MaxPooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D, TimeDistributed

In [2]:
input_shape_img = ( 1024, 1024, 3 )
img_input = Input( shape=input_shape_img )

### Step 1 : CNN 提取特徵

In [3]:
def CNN_Base( img_input ):
    
    # Block 1
    x = Conv2D( 64, (3,3), activation='relu', padding='same', name='block1_conv1' )(img_input)
    x = Conv2D( 64, (3,3), activation='relu', padding='same', name='block1_conv2' )(x)
    # 縮水1/2 1024x1024 -> 512x512
    x = MaxPooling2D( (2,2), strides=(2,2), name='block1_pool' )(x)

    # Block 2
    x = Conv2D( 128, (3,3), activation='relu', padding='same', name='block2_conv1' )(x)
    x = Conv2D( 128, (3,3), activation='relu', padding='same', name='block2_conv2' )(x)
    # 縮水1/2 512x512 -> 256x256
    x = MaxPooling2D( (2,2), strides=(2,2), name='block2_pool' )(x)

    # Block 3
    x = Conv2D( 256, (3,3), activation='relu', padding='same', name='block3_conv1' )(x)
    x = Conv2D( 256, (3,3), activation='relu', padding='same', name='block3_conv2' )(x)
    x = Conv2D( 256, (3,3), activation='relu', padding='same', name='block3_conv3' )(x)
    # 縮水1/2 256x256 -> 128x128
    x = MaxPooling2D( (2,2), strides=(2,2), name='block3_pool' )(x)

    # Block 4
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block4_conv1' )(x)
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block4_conv2' )(x)
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block4_conv3' )(x)
    # 縮水1/2 128x128 -> 64x64
    x = MaxPooling2D( (2,2), strides=(2,2), name='block4_pool' )(x)

    # Block 5
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block5_conv1' )(x)
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block5_conv2' )(x)
    x = Conv2D( 512, (3,3), activation='relu', padding='same', name='block5_conv3' )(x)

    # Feature Map = 64x64x512
    return x

### Step 2 : RPN( Region Proposal Network )

In [4]:
def RPN( Base_Layers, num_anchors ):

    x = Conv2D( 512, (3,3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1' )(Base_Layers)

    # RPN 分類和迴歸
    # 分兩類: 前景或背景
    # 預測 BBox 的 [x,y,w,h]  
    x_class = Conv2D( num_anchors*2, (1,1), activation='softmax', name='rpn_out_class' )(x)
    x_reg = Conv2D( num_anchors*4, (1,1), activation='linear', name='rpn_out_regress' )(x)

    return x_class, x_reg, Base_Layers

In [5]:
Base_Layers = CNN_Base(img_input)

In [6]:
x_class, x_reg, Base_Layers = RPN( Base_Layers, 9 )

In [7]:
print( 'CNN Output：', Base_Layers )
print( 'RPN 分類支線：', x_class )         # 確認深度是否為 18 ?
print( 'RPN 迴歸支線：', x_reg )           # 確認深度是否為 36 ?

CNN Output： Tensor("block5_conv3/Identity:0", shape=(None, 64, 64, 512), dtype=float32)
RPN 分類支線： Tensor("rpn_out_class/Identity:0", shape=(None, 64, 64, 18), dtype=float32)
RPN 迴歸支線： Tensor("rpn_out_regress/Identity:0", shape=(None, 64, 64, 36), dtype=float32)
