In [3]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as nets

from keras.applications import vgg16
from keras import backend as K

from math import floor
import pprint

import matplotlib.image as mpimg
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,Lambda,Conv2D,Input
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils.data_utils import get_file
from keras.engine.topology import Layer


WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'

anchor_box_scales = [128, 256, 512]
anchor_box_ratio = [[1,1],[1,2],[2,1]]

def vgg16(input_tensor):
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    
    return x

def load_vgg_weights(model):
    
    weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                            WEIGHTS_PATH_NO_TOP,
                            cache_subdir='models')
    
    model.load_weights(weights_path,by_name=True)
    
    return model 


def rpn(layer, num_anchors):
    rpn_conv = Conv2D(512, (3, 1), activation='relu', name='rpn_conv_3x3', padding="same")(layer)
    
    rpn_class =  Conv2D(num_anchors, (1, 1), 
                        activation='sigmoid', 
                        name='rpn_class', 
                        padding="same", 
                        kernel_initializer='uniform')(rpn_conv)
    
    rpn_regr = Conv2D(num_anchors*4, (1, 1), 
                      activation='linear', 
                      name='rpn_regr', 
                      padding="same", 
                      kernel_initializer='zero')(rpn_conv)
    
    return rpn_class, rpn_regr, rpn_conv


In [19]:
class RoiPoolingConv(Layer):
    '''ROI pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_size: int
            Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
        num_rois: number of regions of interest to be used
    # Input shape
        list of two 4D tensors [X_img,X_roi] with shape:
        X_img:
        `(1, channels, rows, cols)` if dim_ordering='th'
        or 4D tensor with shape:
        `(1, rows, cols, channels)` if dim_ordering='tf'.
        X_roi:
        `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
    # Output shape
        3D tensor with shape:
        `(1, num_rois, channels, pool_size, pool_size)`
    '''
    def __init__(self, pool_size, num_rois, **kwargs):

        self.dim_ordering = K.image_dim_ordering()
        assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'

        self.pool_size = pool_size
        self.num_rois = num_rois

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.dim_ordering == 'th':
            self.nb_channels = input_shape[0][1]
        elif self.dim_ordering == 'tf':
            self.nb_channels = input_shape[0][3]

    def compute_output_shape(self, input_shape):
        if self.dim_ordering == 'th':
            return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size
        else:
            return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):

        assert(len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]
            
            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times

            if self.dim_ordering == 'th':
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = x + ix * row_length
                        x2 = x1 + row_length
                        y1 = y + jy * col_length
                        y2 = y1 + col_length

                        x1 = K.cast(x1, 'int32')
                        x2 = K.cast(x2, 'int32')
                        y1 = K.cast(y1, 'int32')
                        y2 = K.cast(y2, 'int32')

                        x2 = x1 + K.maximum(1,x2-x1)
                        y2 = y1 + K.maximum(1,y2-y1)
                        
                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]

                        x_crop = img[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

            elif self.dim_ordering == 'tf':
                x = K.cast(x, 'int32')
                y = K.cast(y, 'int32')
                w = K.cast(w, 'int32')
                h = K.cast(h, 'int32')

                rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
                outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        if self.dim_ordering == 'th':
            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
        else:
            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output


class RoiPoolingLayer(Layer):
    '''
        pool out the selected anchor
    '''
    def __init__(self, convLayers, proposalLayer, **kwargs):
        self.convLayers = convLayers
        self.proposalLayer = proposalLayer
        
        super(RoiPoolingLayer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        super(RoiPoolingLayer, self).build(input_shape)
        pass
        
    def call(self,x):
        pass
    
    def compute_output_shape(self, input_shape):
        # return pooled rois
        return None, 7, 7, 3
#     def get_shape(self):
#         return None, 7, 7, 3


class ProposalLayer(Layer):
    '''
        select the proposal that are valid
    '''
    def __init__(self,rpn_regr, rpn_conv, **kwargs):
        self.rpn_regr = rpn_regr
        self.rpn_conv = rpn_conv
        super(ProposalLayer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        super(ProposalLayer, self).build(input_shape)
        pass
        
    def call(self,x):
        # ensure there are two input node to this layer
        assert(len(x) == 2)
        featureMap = x[0]
        rois = x[1]
        
        # take top n proposal
        print(rois.shape)
        
        # apply NMS
        
        # take top n proposal 
        
        
        
        pass
    
    def compute_output_shape(self, input_shape):
        # return selected proposal
        return None, 4
#     def get_shape(self):
#         return None, 4



In [20]:


def rcnn(convLayers, rpn_regr, rpn_conv, nb_rois, nb_classes= 21, trainable=False):
    #select roi that relavent 
    proposalLayer = ProposalLayer(rpn_regr, rpn_conv)
    # pass the heatmap and roi to roi pooling layer
    roi_pooling = RoiPoolingConv(convLayers, proposalLayer)
    
    x = Flatten()(roi_pooling)    
    x = Dense(4096, name="rcnn_fc6")(x)
    x = Dense(4096, name="rcnn_fc7")(x)
    cls_score = Dense(nb_rois, name="cls_score")(x)
    cls_score = softmax(cls_score)
    
    bbox_pred = Dense(nb_rois * 4, name="bbox_pred")(x)
    
    return cls_score, bbox_pred


imgs shape (1, 720, 1280, 3)


AttributeError: 'RoiPoolingLayer' object has no attribute 'get_shape'

In [None]:

TEST_FULL_IMG = mpimg.imread("test1.jpg")
imgs = np.array([TEST_FULL_IMG])

nb_anchors = len(anchor_box_scales) * len(anchor_box_ratio)

print("imgs shape", imgs.shape)

img_input = Input(shape=(None,None,3))

block5_conv3 = vgg16(input_tensor=img_input)
rpn_class, rpn_regr, rpn_conv = rpn(block5_conv3,nb_anchors)

cls_score, bbox_pred = rcnn(block5_conv3, rpn_regr, rpn_conv, 21, 21, trainable=True)

rpn_model = Model(img_input, [rpn_regr,rpn_conv], name='rpn')
fasterRcnn = Model(img_input, [cls_score, bbox_pred], name="fasterRcnn")
model = load_vgg_weights(model)


res = model.predict(imgs)
print("res shape", res.shape)
