# Faster R-CNN Implementation

##Toolbox

In [2]:
%tensorflow_version 2.x

TensorFlow is already loaded. Please restart the runtime to change versions.


In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import doctest

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout, Layer, Concatenate
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed

1 items had no tests:
    __main__
0 tests in 1 items.
0 passed and 0 failed.
Test passed.


TestResults(failed=0, attempted=0)

In [19]:
doctest.testmod(verbose=True)
def iou(bbox1, bbox2):
  '''
  Bbox format must be [x_min,y_min,x_max,y_max]
  >>> iou([10,10,10,10],[5,5,5,5])
  0
  >>> iou([0,0,4,4],[2,2,4,4])
  0.25
  >>> iou([0,0,4,4],[2,2,6,6])
  0.14285714285714285
  '''

  xmin_inter = max(bbox1[0],bbox2[0])
  ymin_inter = max(bbox1[1],bbox2[1])
  xmax_inter = min(bbox1[2],bbox2[2])
  ymax_inter = min(bbox1[3],bbox2[3])

  width_inter = max(xmax_inter - xmin_inter,0)
  height_inter = max(ymax_inter - ymin_inter,0)
  if(width_inter == 0 or height_inter == 0):
    iou = 0
  else:
    iou = width_inter*height_inter/((bbox1[2]-bbox1[0])*((bbox1[3]-bbox1[1]))+(bbox2[2]-bbox2[0])*((bbox2[3]-bbox2[1]))-width_inter*height_inter)
  return iou


Trying:
    iou([10,10,10,10],[5,5,5,5])
Expecting:
    0
ok
Trying:
    iou([0,0,4,4],[2,2,4,4])
Expecting:
    0.25
ok
Trying:
    iou([0,0,4,4],[2,2,6,6])
Expecting:
    0.14285714285714285
ok
1 items had no tests:
    __main__
1 items passed all tests:
   3 tests in __main__.iou
3 tests in 2 items.
3 passed and 0 failed.
Test passed.


##Architecture

##Input

In [0]:
img_height = 224
img_width = 224
img_depth = 3
input_shape = (img_height,img_width,img_depth)

def input_tensor(input_shape):
  x = Input(shape = input_shape)
  return x

## Base Model

In [0]:
vgg_ratio = 16

In [0]:
def vgg16(input_tensor):
      
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)

    return x

In [0]:
def get_weights(weights):
  return None

##RPN

In [0]:
rpn_height = img_height/vgg_ratio
rpn_width = img_width/vgg_ratio
rpn_depth = 512
anchors_ratios = [0.5,1,1.5]
anchors_size = [64,128,256]
anchors_count = len(anchors_ratios) * len(anchors_size)


def rpn(x):
  
  # Mutual Layer
  x1 = Conv2D(512,(3,3), activation='relu', padding='same', kernel_initializer='normal', name='rpn_mutual_layer')(x)
  
  # Classification Layer
  x_class = Conv2D(anchors_count, (1,1), activation = 'sigmoid', kernel_initializer = 'uniform', name='rpn_class_layer')(x)
  
  # Regression Layer
  x_reg = Conv2D(4*anchors_count, (1,1), activation='linear', kernel_initializer='zero', name='rpn_reg_layer')(x)
  return [x_class,x_reg]

##ROI

In [0]:
class ROIPoolingLayer(Layer):
    '''
    Input will be : [VGG16 Feature Layers, Proposal]
    Shape is [(1,rpn_width,rpn_height,512),(1,x,y,h,w)]
    '''
    def __init__(self, pooling_size, **kwargs):
        
        self.pooling_size = pooling_size
        
        super(ROIPoolingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]

    def call(self, x):
        assert len(x) == 2
        img = x[0]
        roi = x[1]
        
        x = roi[0,0]
        y = roi[0,1]
        h = roi[0,2]
        w = roi[0,3]
        
        x = K.cast(x, 'int32')
        y = K.cast(y, 'int32')
        w = K.cast(w, 'int32')
        h = K.cast(h, 'int32')


        output = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pooling_size, self.pooling_size))
        output = K.reshape(output , (1, self.pooling_size, self.pooling_size, self.nb_channels))
        return output

    def compute_output_shape(self, input_shape):
        return None, self.pooling_size, self.pooling_size, self.nb_channels

##R-CNN

In [0]:
classes_count = 3
def rcnn(x):
  
  # Flatten Layer
  x = Flatten()(x)
  
  # 1st Dense Layer
  x = Dense(4096,activation='relu',kernel_initializer='normal', name='rcnn_dense1' )(x)

  # 2nd Dense Layer
  x = Dense(4096,activation='relu',kernel_initializer='normal', name='rcnn_dense2')(x)
  
  # Classification Layer
  x_class = Dense(classes_count+1, activation = 'softmax', kernel_initializer = 'uniform', name='rcnn_class_layer')(x)
  
  # Regression Layer
  x_reg = Dense(4*classes_count, activation='linear', kernel_initializer='zero', name='rcnn_reg_layer')(x)
  
  return [x_class,x_reg]

In [22]:
input_test = input_tensor(input_shape)
vgg16_model = vgg16(input_test)
output = rpn(vgg16_model)
print(vgg16_model)
print(output)
output_test = K.reshape(output[1],(1764,4))
#output_test = K.squeeze(output_test,axis=0)
print(output_test)
output = ROIPoolingLayer(7)([vgg16_model,output_test])
print(output)
output = rcnn(output)
print(output)

Tensor("block5_conv3_1/Identity:0", shape=(None, 14, 14, 512), dtype=float32)
[<tf.Tensor 'rpn_class_layer_1/Identity:0' shape=(None, 14, 14, 9) dtype=float32>, <tf.Tensor 'rpn_reg_layer_1/Identity:0' shape=(None, 14, 14, 36) dtype=float32>]
Tensor("Reshape_1:0", shape=(1764, 4), dtype=float32)
Tensor("roi_pooling_layer_1/Identity:0", shape=(1, 7, 7, 512), dtype=float32)
[<tf.Tensor 'rcnn_class_layer/Identity:0' shape=(1, 4) dtype=float32>, <tf.Tensor 'rcnn_reg_layer/Identity:0' shape=(1, 12) dtype=float32>]
