# Faster R-CNN Implementation

##Toolbox

In [2]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
import doctest
import os

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout, Layer, Concatenate
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed

In [5]:
doctest.testmod(verbose=True)
def iou(bbox1, bbox2):
  '''
  Bbox format must be [x_min,y_min,x_max,y_max]
  >>> iou([10,10,10,10],[5,5,5,5])
  0
  >>> iou([0,0,4,4],[2,2,4,4])
  0.25
  >>> iou([0,0,4,4],[2,2,6,6])
  0.14285714285714285
  '''

  xmin_inter = max(bbox1[0],bbox2[0])
  ymin_inter = max(bbox1[1],bbox2[1])
  xmax_inter = min(bbox1[2],bbox2[2])
  ymax_inter = min(bbox1[3],bbox2[3])

  width_inter = max(xmax_inter - xmin_inter,0)
  height_inter = max(ymax_inter - ymin_inter,0)
  if(width_inter == 0 or height_inter == 0):
    iou = 0
  else:
    iou = width_inter*height_inter/((bbox1[2]-bbox1[0])*((bbox1[3]-bbox1[1]))+(bbox2[2]-bbox2[0])*((bbox2[3]-bbox2[1]))-width_inter*height_inter)
  return iou


Trying:
    iou([10,10,10,10],[5,5,5,5])
Expecting:
    0
ok
Trying:
    iou([0,0,4,4],[2,2,4,4])
Expecting:
    0.25
ok
Trying:
    iou([0,0,4,4],[2,2,6,6])
Expecting:
    0.14285714285714285
ok
1 items had no tests:
    __main__
1 items passed all tests:
   3 tests in __main__.iou
3 tests in 2 items.
3 passed and 0 failed.
Test passed.


##Architecture

##Input

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [29]:
base_filepath = 'drive/My Drive/SoccerAI/train_resources'
# Input file for training is train_annotation.txt
train_filepath = base_filepath+'/train_annotation.txt'
train_df = pd.read_csv(train_filepath,header=None,names=['ID','Filepath','XMin','YMin','XMax','YMax','Class'])
train_df = train_df.set_index('ID')
train_imgs = train_df.index.unique()

                                                           Filepath  ...   Class
ID                                                                   ...        
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/SoccerAI/train_resources/Person/00064...  ...  Person
0006487596052ff0  My Drive/S

In [0]:
img_height = 224
img_width = 224
img_depth = 3
input_shape = (img_height,img_width,img_depth)

def input_tensor(input_shape):
  x = Input(shape = input_shape)
  return x

## Base Model

In [0]:
vgg_ratio = 16

In [0]:
def vgg16(input_tensor):
      
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)

    return x

In [0]:
def get_weights(weights):
  return None

##RPN

In [0]:
rpn_height = img_height/vgg_ratio
rpn_width = img_width/vgg_ratio
rpn_depth = 512
anchors_ratios = [0.5,1,1.5]
anchors_size = [64,128,256]
anchors_count = len(anchors_ratios) * len(anchors_size)


def rpn(x):
  
  # Mutual Layer
  x1 = Conv2D(512,(3,3), activation='relu', padding='same', kernel_initializer='normal', name='rpn_mutual_layer')(x)
  
  # Classification Layer
  x_class = Conv2D(anchors_count, (1,1), activation = 'sigmoid', kernel_initializer = 'uniform', name='rpn_class_layer')(x)
  
  # Regression Layer
  x_reg = Conv2D(4*anchors_count, (1,1), activation='linear', kernel_initializer='zero', name='rpn_reg_layer')(x)
  return [x_class,x_reg]

In [43]:
# Anchors Rep shape is (rpn_height,rpn_width,len(anchors_ratio)*len(anchors),4) stands for xmin,ymin,xmax,ymax
def anchors_list(vgg_ratio,rpn_height,rpn_width,anchors_ratios,anchors_size):
  anchors = np.zeros((rpn_height,rpn_width,len(anchors_ratios)*len(anchors_size),4))
  for i in range(0,rpn_height):
    for j in range(0,rpn_width):
      k = 0
      for ratio in anchors_ratios:
        for size in anchors_size:
          xcenter = i*vgg_ratio + vgg_ratio/2
          ycenter = j*vgg_ratio + vgg_ratio/2
          anchors_width = ratio*size
          anchors_height = size
          xmin = xcenter - anchors_width/2
          ymin = ycenter - anchors_height/2
          xmax = xcenter + anchors_width/2
          ymax = ycenter + anchors_height/2
          anchors[i][j][k][0] = xmin
          anchors[i][j][k][1] = ymin
          anchors[i][j][k][2] = xmax
          anchors[i][j][k][3] = ymax
          k+=1
  return anchors

anchors = anchors_list(vgg_ratio,int(rpn_height),int(rpn_width),anchors_ratios,anchors_size)
print(anchors)

[[[[  -8.  -24.   24.   40.]
   [ -24.  -56.   40.   72.]
   [ -56. -120.   72.  136.]
   ...
   [ -40.  -24.   56.   40.]
   [ -88.  -56.  104.   72.]
   [-184. -120.  200.  136.]]

  [[  -8.   -8.   24.   56.]
   [ -24.  -40.   40.   88.]
   [ -56. -104.   72.  152.]
   ...
   [ -40.   -8.   56.   56.]
   [ -88.  -40.  104.   88.]
   [-184. -104.  200.  152.]]

  [[  -8.    8.   24.   72.]
   [ -24.  -24.   40.  104.]
   [ -56.  -88.   72.  168.]
   ...
   [ -40.    8.   56.   72.]
   [ -88.  -24.  104.  104.]
   [-184.  -88.  200.  168.]]

  ...

  [[  -8.  152.   24.  216.]
   [ -24.  120.   40.  248.]
   [ -56.   56.   72.  312.]
   ...
   [ -40.  152.   56.  216.]
   [ -88.  120.  104.  248.]
   [-184.   56.  200.  312.]]

  [[  -8.  168.   24.  232.]
   [ -24.  136.   40.  264.]
   [ -56.   72.   72.  328.]
   ...
   [ -40.  168.   56.  232.]
   [ -88.  136.  104.  264.]
   [-184.   72.  200.  328.]]

  [[  -8.  184.   24.  248.]
   [ -24.  152.   40.  280.]
   [ -56.   88.   72

##ROI

In [0]:
class ROIPoolingLayer(Layer):
    '''
    Input will be : [VGG16 Feature Layers, Proposal]
    Shape is [(1,rpn_width,rpn_height,512),(1,x,y,h,w)]
    '''
    def __init__(self, pooling_size, **kwargs):
        
        self.pooling_size = pooling_size
        
        super(ROIPoolingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[0][3]

    def call(self, x):
        assert len(x) == 2
        img = x[0]
        roi = x[1]
        
        x = roi[0,0]
        y = roi[0,1]
        h = roi[0,2]
        w = roi[0,3]
        
        x = K.cast(x, 'int32')
        y = K.cast(y, 'int32')
        w = K.cast(w, 'int32')
        h = K.cast(h, 'int32')


        output = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pooling_size, self.pooling_size))
        output = K.reshape(output , (1, self.pooling_size, self.pooling_size, self.nb_channels))
        return output

    def compute_output_shape(self, input_shape):
        return None, self.pooling_size, self.pooling_size, self.nb_channels

##R-CNN

In [0]:
classes_count = 3
def rcnn(x):
  
  # Flatten Layer
  x = Flatten()(x)
  
  # 1st Dense Layer
  x = Dense(4096,activation='relu',kernel_initializer='normal', name='rcnn_dense1' )(x)

  # 2nd Dense Layer
  x = Dense(4096,activation='relu',kernel_initializer='normal', name='rcnn_dense2')(x)
  
  # Classification Layer
  x_class = Dense(classes_count+1, activation = 'softmax', kernel_initializer = 'uniform', name='rcnn_class_layer')(x)
  
  # Regression Layer
  x_reg = Dense(4*classes_count, activation='linear', kernel_initializer='zero', name='rcnn_reg_layer')(x)
  
  return [x_class,x_reg]

In [30]:
input_test = input_tensor(input_shape)
vgg16_model = vgg16(input_test)
output = rpn(vgg16_model)
print(vgg16_model)
print(output)
output_test = K.reshape(output[1],(1764,4))
#output_test = K.squeeze(output_test,axis=0)
print(output_test)
output = ROIPoolingLayer(7)([vgg16_model,output_test])
print(output)
output = rcnn(output)
print(output)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Tensor("block5_conv3/Relu:0", shape=(?, 14, 14, 512), dtype=float32)
[<tf.Tensor 'rpn_class_layer/Sigmoid:0' shape=(?, 14, 14, 9) dtype=float32>, <tf.Tensor 'rpn_reg_layer/BiasAdd:0' shape=(?, 14, 14, 36) dtype=float32>]
Tensor("Reshape:0", shape=(1764, 4), dtype=float32)
Tensor("roi_pooling_layer/Reshape:0", shape=(1, 7, 7, 512), dtype=float32)
[<tf.Tensor 'rcnn_class_layer/Softmax:0' shape=(1, 4) dtype=float32>, <tf.Tensor 'rcnn_reg_layer/BiasAdd:0' shape=(1, 12) dtype=float32>]
