In [1]:
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.layers import *
from tensorflow.keras.models import *

2023-04-03 13:53:13.581510: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-03 13:53:13.706069: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-03 13:53:13.733091: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# RCNN

### "Region-based Convolutional Neural Network"는 Object Detection문제를 해결하기 위한 딥러닝 모델
#### RCNN은 4개의 주요 component로 구성된다.

### 1. Region Proposal Network (RPN)
- RPN을 사용하여 이미지 내 객체 후보 영역을 추출

### 2. Feature Extractor
- 추출된 영역은 Feature Extractor를 통해 특성 맵(feature map)으로 변환

### 3. Object Classifier

### 4. Bounding Box Regressor

- feature map은 Object Classifier와 Bounding Box Regressor에 입력으로 사용되어 최종적으로 객체를 검출하고 위치를 조정.


## 1. RPN(Region Proposal Network) 구현
### RPN은 입력 이미지에서 object가 있을 것으로 예상되는 영역(proposal)을 추출하기 위한 신경망
- RPN은 입력 이미지에서 feature map을 추출
- sliding window 기법으로 이미지 전체를 훑으면서 proposal 영역을 추출

In [2]:
def rpn(input_tensor, num_anchors):
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(input_tensor)
    rpn_objectness = Conv2D(num_anchors, (1, 1), activation='sigmoid', name='rpn_objectness')(x)
    rpn_bbox_regression = Conv2D(num_anchors * 4, (1, 1), name='rpn_bbox_regression')(x)

    return rpn_objectness, rpn_bbox_regression

## 2. Feature Extractor 구현
- Feature Extractor는 입력 이미지에서 feature map을 추출하는 신경망
- ex. VGG16, ResNet 등

In [3]:
def feature_extractor(input_tensor):
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    feature_map = MaxPooling2D((2, 2))(x)
    
    # Return output tensor (feature map)
    return feature_map

## 3. RoI Pooling 구현
- RoI Pooling은 RPN에서 추출한 proposal 영역을 Feature Extractor의 feature map 크기에 맞게 변환해주는 연산
- 일반적으로 pooling 크기는 7x7로 설정

In [4]:
def region_proposal_network(input_tensor, num_anchors):
    # Define convolutional layers for region proposal network
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(input_tensor)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)

    # Define objectness score and regression branches for each anchor
    objectness_score = Conv2D(filters=num_anchors, kernel_size=(1, 1), activation='sigmoid', name='objectness_score')(x)
    bbox_regression = Conv2D(filters=num_anchors * 4, kernel_size=(1, 1), name='bbox_regression')(x)

    # Return objectness score and bbox regression tensors
    return objectness_score, bbox_regression

def region_of_interest_pooling(feature_map, rois, pool_size):
    # Convert ROI coordinates from normalized to pixel coordinates
    rois = tf.multiply(rois, tf.constant([feature_map.shape[1], feature_map.shape[2], feature_map.shape[1], feature_map.shape[2]]))

    # Round ROI coordinates to the nearest integer values and convert them to integers
    rois = tf.cast(tf.math.rint(rois), tf.int32)

    # Crop and resize feature maps for each ROI
    crops = tf.image.crop_and_resize(feature_map, rois, tf.zeros((rois.shape[0],), dtype=tf.int32), pool_size)

    # Return pooled features
    return MaxPool2D(pool_size=(1, 1))(crops)

## 4. Object Classifier 구현
- Object Classifier는 RoI Pooling 이후에 Feature Map에서 추출된 RoI 영역을 입력으로 받아 각 객체의 class를 분류하는 모델.
- 구현을 위해서는 RoI Pooling 이후의 특징 맵을 입력으로 받아 다음과 같은 CNN을 구성한다.

In [5]:
def object_classifier(input_tensor, num_classes):
    # Define convolutional layers and fully connected layers for object classification
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(input_tensor)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Flatten()(x)
    x = Dense(units=4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(units=4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    class_scores = Dense(units=num_classes, activation='softmax')(x)

    # Return output tensor (class scores)
    return class_scores

# 최종적인 RCNN구현

#### apply_bbox_regression 함수
- bounding box regression 값을 적용하여 RoI를 조정하는 함수

In [6]:
def apply_bbox_regression(rois, bbox_regression):
    # Convert RoI to (x, y, w, h) format
    rois = to_x1y1x2y2(rois)

    # Split bounding box regression values into (dx, dy, dw, dh) format
    bbox_regression = tf.reshape(bbox_regression, (-1, 4))
    dx, dy, dw, dh = tf.split(bbox_regression, 4, axis=1)

    # Apply bounding box regression to RoI
    w = rois[:, :, 2] - rois[:, :, 0]
    h = rois[:, :, 3] - rois[:, :, 1]
    x = rois[:, :, 0] + w * dx
    y = rois[:, :, 1] + h * dy
    w = w * tf.exp(dw)
    h = h * tf.exp(dh)
    x = x - w / 2
    y = y - h / 2
    output_rois = tf.stack([x, y, x+w, y+h], axis=2)

    return output_rois

In [7]:
def RCNN(input_tensor, rois, num_classes):
    # RPN model
    rpn_objectness, rpn_bbox_regression = rpn(input_tensor, num_anchors=9)

    # RoI Pooling layer
    feature_map = feature_extractor(input_tensor)
    roi_pool = tf.keras.layers.RoIPooling2D(pool_size=(7, 7))([feature_map, rois])

    # Object classification model
    class_scores = object_classifier(roi_pool, num_classes)

    # Bounding box regression
    bbox_regression = Dense(units=num_classes*4, activation='linear')(class_scores)
    bbox_regression = Reshape((num_classes, 4))(bbox_regression)

    # Apply bounding box regression to RoI
    rois = Reshape((-1, 4))(rois)
    output_rois = tf.keras.layers.Lambda(lambda x: apply_bbox_regression(x[0], x[1]))([rois, bbox_regression])

    return class_scores, output_rois

## RCNN구현 정리

In [8]:
def rpn(input_tensor, num_anchors):
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(input_tensor)
    rpn_objectness = Conv2D(num_anchors, (1, 1), activation='sigmoid', name='rpn_objectness')(x)
    rpn_bbox_regression = Conv2D(num_anchors * 4, (1, 1), name='rpn_bbox_regression')(x)

    return rpn_objectness, rpn_bbox_regression




def feature_extractor(input_tensor):
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    feature_map = MaxPooling2D((2, 2))(x)
    
    # Return output tensor (feature map)
    return feature_map



def region_proposal_network(input_tensor, num_anchors):
    # Define convolutional layers for region proposal network
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(input_tensor)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x)

    # Define objectness score and regression branches for each anchor
    objectness_score = Conv2D(filters=num_anchors, kernel_size=(1, 1), activation='sigmoid', name='objectness_score')(x)
    bbox_regression = Conv2D(filters=num_anchors * 4, kernel_size=(1, 1), name='bbox_regression')(x)

    # Return objectness score and bbox regression tensors
    return objectness_score, bbox_regression

def region_of_interest_pooling(feature_map, rois, pool_size):
    # Convert ROI coordinates from normalized to pixel coordinates
    rois = tf.multiply(rois, tf.constant([feature_map.shape[1], feature_map.shape[2], feature_map.shape[1], feature_map.shape[2]]))

    # Round ROI coordinates to the nearest integer values and convert them to integers
    rois = tf.cast(tf.math.rint(rois), tf.int32)

    # Crop and resize feature maps for each ROI
    crops = tf.image.crop_and_resize(feature_map, rois, tf.zeros((rois.shape[0],), dtype=tf.int32), pool_size)

    # Return pooled features
    return MaxPool2D(pool_size=(1, 1))(crops)




def object_classifier(input_tensor, num_classes):
    # Define convolutional layers and fully connected layers for object classification
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(input_tensor)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = Flatten()(x)
    x = Dense(units=4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(units=4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    class_scores = Dense(units=num_classes, activation='softmax')(x)

    # Return output tensor (class scores)
    return class_scores





def apply_bbox_regression(rois, bbox_regression):
    # Convert RoI to (x, y, w, h) format
    rois = to_x1y1x2y2(rois)

    # Split bounding box regression values into (dx, dy, dw, dh) format
    bbox_regression = tf.reshape(bbox_regression, (-1, 4))
    dx, dy, dw, dh = tf.split(bbox_regression, 4, axis=1)

    # Apply bounding box regression to RoI
    w = rois[:, :, 2] - rois[:, :, 0]
    h = rois[:, :, 3] - rois[:, :, 1]
    x = rois[:, :, 0] + w * dx
    y = rois[:, :, 1] + h * dy
    w = w * tf.exp(dw)
    h = h * tf.exp(dh)
    x = x - w / 2
    y = y - h / 2
    output_rois = tf.stack([x, y, x+w, y+h], axis=2)

    return output_rois





def RCNN(input_tensor, rois, num_classes):
    # RPN model
    rpn_objectness, rpn_bbox_regression = rpn(input_tensor, num_anchors=9)

    # RoI Pooling layer
    feature_map = feature_extractor(input_tensor)
    roi_pool = tf.keras.layers.RoIPooling2D(pool_size=(7, 7))([feature_map, rois])

    # Object classification model
    class_scores = object_classifier(roi_pool, num_classes)

    # Bounding box regression
    bbox_regression = Dense(units=num_classes*4, activation='linear')(class_scores)
    bbox_regression = Reshape((num_classes, 4))(bbox_regression)

    # Apply bounding box regression to RoI
    rois = Reshape((-1, 4))(rois)
    output_rois = tf.keras.layers.Lambda(lambda x: apply_bbox_regression(x[0], x[1]))([rois, bbox_regression])

    return class_scores, output_rois