In [1]:
from tensorflow.contrib import layers
from tensorflow.contrib.framework.python.ops import arg_scope
from tensorflow.contrib.layers.python.layers import layers as layers_lib
from tensorflow.contrib.layers.python.layers import regularizers
from tensorflow.contrib.layers.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import variable_scope
import tensorflow.contrib.slim as slim
import tensorflow as tf
import os

from scipy.misc import imresize

from math import floor,exp
import pprint

import matplotlib.image as mpimg
import numpy as np
import matplotlib.pylab as plt

In [2]:

anchor_box_scales = [128, 256, 512]
anchor_box_ratio = [[1,1],[1,2],[2,1]]


TEST_FULL_IMG = np.array([mpimg.imread("./test1.jpg")])

In [3]:
print(TEST_FULL_IMG.shape)
print(TEST_FULL_IMG.shape[1]/4, TEST_FULL_IMG.shape[2]/4)

(1, 720, 1280, 3)
180.0 320.0


In [15]:
def vgg_16(inputs,  scope='vgg_16'):
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'

        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection):
            net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
#             net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
#             net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')

            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        
    return net, end_points


def rpn(net, num_anchors=9, scope="rpn"):
    with tf.variable_scope(scope, 'rpn', [net]) as sc:
        end_points_collection = sc.name + '_end_points'
        
        with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 
                            outputs_collections=end_points_collection, 
                            activation_fn=tf.nn.relu,
                            weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)):
            
            net = slim.conv2d(net, 512, [3, 1], scope='rpn_conv_3x3', padding='SAME')
            
            rpn_class = slim.conv2d(net, num_anchors, [1, 1], scope='rpn_class')
            
            rpn_regr = slim.conv2d(net, num_anchors*4, [1, 1], scope='rpn_regr')   

            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        
    return rpn_class, rpn_regr, end_points

def mapAnchorToBoxs(rpn_class, rpn_regr, feature_map_ratio, ratio=[(1,1),(2,1),(1,2)], pixel=[64,128,512]):
    
    class_res  = [];
    regr_res = [];
    
    
    for row in range(rpn_class.shape[0]):
        for col in range(rpn_class.shape[1]):
            i = 0
            for r in ratio:
                j = 0
                for p in pixel:
                    anchor_width = r[0]*p / feature_map_ratio
                    anchor_height = r[1]*p / feature_map_ratio
                    
                    p_anchor = rpn_regr[row][col][i+j]
                    
                    cx = row + p_anchor[0]*anchor_width
                    cy = col + p_anchor[1]*anchor_height
                    pred_w = exp(p_anchor[2]) * anchor_width
                    pred_h = exp(p_anchor[3]) * anchor_height
                    
                    x1 = int(cx - 0.5 * pred_w)
                    y1 = int(cy - 0.5 * pred_h)
                    x2 = int(cx + 0.5 * pred_w)
                    y2 = int(cy + 0.5 * pred_h)
                    
                    regr_res.append([x1,y1,x2,y2])
                    class_res.append(rpn_class[row][col][i+j])
#                     print("rpn_class[row][col][i+j]",rpn_class[row][col][i+j])
                    
                    
                    j+=1
                i+=1
    
    return np.array(class_res), np.array(regr_res)

def getLargest(a,b):
    if (a>b):
        return a
    else :
        return b

def getSmallest(a,b):
    if(a<b):
        return a
    else:
        return b
    
    
def clip_boxes(boxes, im_shape_col, im_shape_row):
    """
    Clip boxes to image boundaries.
    """
    for box in boxes:
        box[0] = getLargest( getSmallest(box[0], im_shape_col) ,0)
        box[1] = getLargest( getSmallest(box[1], im_shape_row) ,0)
        box[2] = getLargest( getSmallest(box[2], im_shape_col) ,0)
        box[3] = getLargest( getSmallest(box[3], im_shape_row) ,0)
            
    return boxes

def filter_boxs_by_size(boxes, threshold=30):
    
    ws = boxes[:, 2] - boxes[:, 0] + 1
    hs = boxes[:, 3] - boxes[:, 1] + 1
    keep = np.where((ws >= threshold) & (hs >= threshold))[0]
    return keep

def nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

def rpn_proposal_layer(rpn_class, rpn_regr, img_input):
    ''' 
    This function map anchor to the position, which return x,y,w,h with globel coor. 
    rather than anchor inner position.
    img_input is use for get the imgSize ref.
    '''
    
    print("rpn_class.shape", rpn_class.shape)
    print("rpn_regr.shape", rpn_regr.shape)
    print("img_input.shape", img_input.shape)
    
    img_width = img_input.shape[2]
    img_height = img_input.shape[1]
    
    feature_map_width = rpn_regr.shape[2]
    feature_map_height = rpn_regr.shape[1]
    
    img_to_feature_map_ratio = img_width/feature_map_width
    
    nb_anchors = int(rpn_class.shape[3])
    
#     reshape the array by anchors 
    rpn_class_reshaped = np.reshape(rpn_class, (int(rpn_class.shape[1]), int(rpn_class.shape[2]),nb_anchors, 1))
    rpn_regr_reshaped = np.reshape(rpn_regr, (int(rpn_regr.shape[1]), int(rpn_regr.shape[2]),nb_anchors, 4)) 
    
    class_res, regr_res = mapAnchorToBoxs(rpn_class_reshaped, rpn_regr_reshaped, img_to_feature_map_ratio)
    
    # 2. clip predicted boxes to image
    regr_res = clip_boxes(regr_res, int(rpn_class.shape[2]), int(rpn_class.shape[1]))
    print("class_res.shape",class_res.shape)
    print("regr_res.shape",regr_res.shape)
    
    # filter by size
    id_keep = filter_boxs_by_size(regr_res, 30)
    print("id_keep.shape",id_keep.shape)
    class_res_keep = class_res[id_keep]
    regr_res_keep = regr_res[id_keep, :]
    
    # sort 
    # getTop N 
    pre_nms_topN = 6000
    
    order = class_res_keep.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = regr_res_keep[order, :]
    scores = class_res_keep[order]
    
    print("proposals.shape",proposals.shape)    
    print("scores.shape",scores.shape)
    
    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    post_nms_topN = 300
    nms_thresh = 0.6
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    print("proposals.shape",proposals.shape)    
    print("scores.shape",scores.shape)
    
    return proposals.astype(np.float32) #.astype(np.int32) # proposals.astype(np.float32), scores.astype(np.float32)


In [20]:

# def roi_pooling_layer(feature_map, rois, img_input):
#     '''
#         rois in x1,y1,x2,y2 fromat in actual image coord.
#     '''
#     print("feature_map.shape", feature_map.shape)
#     print("img_input.shape", img_input.shape)
#     print("rois.shape",rois.shape)
    
# # feature_map.shape (1, 180, 320, 512)
# # img_input.shape (1, 720, 1280, 3)
    
#     ratio = img_input.shape[1]/feature_map.shape[1]
    
#     result = []
#     for roi in rois[0]:
#         coord = (roi / ratio).astype(np.int)
#         print(coord)
#         img = tf.image.resize_images(feature_map[0][coord[0]:coord[2], coord[1]:coord[3], :], (7, 7))
#         result.append(img)
    
#     print(result)
#     result = np.array(result)
    
#     print("result.shape",result.shape)
    
# #     for roi in rois[0]:
# #         print(roi)
#     return result.astype(np.float32) # np.array(np.zeros((5,7,7,512)).astype(np.float32))

def getBoxIds(poposal_res):
    print("poposal_res.shape",poposal_res.shape)
    return np.zeros(len(poposal_res)).astype(np.int32)

In [21]:
nb_anchors = len(anchor_box_scales) * len(anchor_box_ratio)

img_input = tf.placeholder(tf.float32, [1, None, None, 3])

crop_size = tf.constant([14,14])

conv_layer,conv_end_points = vgg_16(img_input)
conv_restore_names = [ item for item in conv_end_points] 

rpn_class, rpn_regr, rpn_end_points = rpn(conv_layer,nb_anchors)

# poposal layer
roi_proposal = tf.py_func(rpn_proposal_layer,[rpn_class, rpn_regr, img_input], tf.float32, name="roi_proposal")

# pooling layer
get_box_ids = tf.py_func(getBoxIds, [roi_proposal],  tf.int32 );

roi_pooling = tf.image.crop_and_resize(conv_layer,roi_proposal, get_box_ids, crop_size)
# roi_pooling = tf.py_func(roi_pooling_layer,[conv_layer, roi_proposal, img_input], tf.float32, name="roi_pooling")


# restore weights
variables_to_restore = slim.get_variables_to_restore(include=conv_restore_names)
vgg_checkpoint_path = os.path.join("./", 'vgg_16.ckpt')
restorer = tf.train.Saver(variables_to_restore)


init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    # Restore variables from disk.
    restorer.restore(sess, "./vgg_16.ckpt")
    
    sess.run(init_op)
    print("restore conv layers")
    
    res, rpn_class_res, rpn_regr_res, roi_proposal_res, roi_pooling_res = sess.run([conv_layer, rpn_class, rpn_regr, roi_proposal, roi_pooling], feed_dict={img_input:TEST_FULL_IMG})
#     plt.imshow(res[0], cmap='gray')
    

restore conv layers
rpn_class.shape (1, 180, 320, 9)
rpn_regr.shape (1, 180, 320, 36)
img_input.shape (1, 720, 1280, 3)
class_res.shape (518400, 1)
regr_res.shape (518400, 4)
id_keep.shape (212176,)
proposals.shape (6000, 4)
scores.shape (6000, 1)
proposals.shape (38, 4)
scores.shape (38, 1)
poposal_res.shape (38, 4)


OutOfRangeError: box_ind has values outside [0, batch)
	 [[Node: CropAndResize_7 = CropAndResize[T=DT_FLOAT, extrapolation_value=0, method="bilinear", _device="/job:localhost/replica:0/task:0/gpu:0"](vgg_16_7/conv5/conv5_3/Relu, roi_proposal_7/_59, PyFunc_7/_61, Const_7)]]
	 [[Node: CropAndResize_7/_63 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_153_CropAndResize_7", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'CropAndResize_7', defined at:
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-21-317633c1726d>", line 18, in <module>
    roi_pooling = tf.image.crop_and_resize(conv_layer,roi_proposal, get_box_ids, crop_size)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tensorflow/python/ops/gen_image_ops.py", line 178, in crop_and_resize
    name=name)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/holman/anaconda2/envs/carND/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

OutOfRangeError (see above for traceback): box_ind has values outside [0, batch)
	 [[Node: CropAndResize_7 = CropAndResize[T=DT_FLOAT, extrapolation_value=0, method="bilinear", _device="/job:localhost/replica:0/task:0/gpu:0"](vgg_16_7/conv5/conv5_3/Relu, roi_proposal_7/_59, PyFunc_7/_61, Const_7)]]
	 [[Node: CropAndResize_7/_63 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_153_CropAndResize_7", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
print(roi_pooling_res.shape)

In [None]:
roi_proposal_res[0].shape

In [None]:
res.shape

In [9]:
rpn_class_res.shape

(1, 180, 320, 9)

In [10]:
rpn_regr_res.shape

(1, 180, 320, 36)

In [11]:
# print(rpn_end_points)

In [12]:
res[0].shape

(180, 320, 512)