-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference_detection.py
128 lines (113 loc) · 5.52 KB
/
inference_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import utils
def refine_detections(rois, probs, deltas, window, config):
# Class IDs per ROI
class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
# Class probability of the top class of each ROI
indices = tf.stack([tf.range(tf.shape(probs)[0]), class_ids], axis=1)
class_scores = tf.gather_nd(probs, indices)
# Class-specific bounding box deltas
deltas_specific = tf.gather_nd(deltas, indices)
# Apply bounding box deltas
refined_rois = utils.apply_bbox_offset(
rois, deltas_specific * config.BBOX_STD_DEV)
# Convert coordiates to image domain
# TODO: better to keep them normalized until later
height, width = config.IMAGE_SHAPE[:2]
refined_rois *= tf.constant([height, width, height, width], dtype=tf.float32)
# Clip boxes to image window
refined_rois = utils.clip_boxes(refined_rois, window)
# Round and cast to int since we're deadling with pixels now
refined_rois = tf.cast(tf.math.rint(refined_rois), tf.int32)
# TODO: Filter out boxes with zero area
# Filter out background boxes
keep = tf.where(class_ids > 0)[:, 0]
# Filter out low confidence boxes
if config.DETECTION_MIN_CONFIDENCE:
conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
#keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
# tf.expand_dims(conf_keep, 0))
keep = tf.sets.intersection(tf.expand_dims(keep, 0),tf.expand_dims(conf_keep, 0))
#keep = tf.sparse_tensor_to_dense(keep)[0]
keep = tf.sparse.to_dense(keep)[0]
# Apply per-class NMS
# 1. Prepare variables
pre_nms_class_ids = tf.gather(class_ids, keep)
pre_nms_scores = tf.gather(class_scores, keep)
pre_nms_rois = tf.gather(refined_rois, keep)
unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
def nms_keep_map(class_id):
"""Apply Non-Maximum Suppression on ROIs of the given class."""
# Indices of ROIs of the given class
ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
# Apply NMS
class_keep = tf.image.non_max_suppression(
#tf.to_float(tf.gather(pre_nms_rois, ixs)),
tf.cast(tf.gather(pre_nms_rois, ixs), tf.float32),
tf.gather(pre_nms_scores, ixs),
max_output_size=config.DETECTION_MAX_INSTANCES,
iou_threshold=config.DETECTION_NMS_THRESHOLD)
# Map indicies
class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
# Pad with -1 so returned tensors have the same shape
gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
class_keep = tf.pad(class_keep, [(0, gap)],mode='CONSTANT', constant_values=-1)
# Set shape so map_fn() can infer result shape
class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
return class_keep
# 2. Map over class IDs
nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,dtype=tf.int64)
# 3. Merge results into one list, and remove -1 padding
nms_keep = tf.reshape(nms_keep, [-1])
nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
# 4. Compute intersection between keep and nms_keep
#keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
# tf.expand_dims(nms_keep, 0))
#keep = tf.sparse_tensor_to_dense(keep)[0]
keep = tf.sets.intersection(tf.expand_dims(keep, 0),tf.expand_dims(nms_keep, 0))
keep = tf.sparse.to_dense(keep)[0]
# Keep top detections
roi_count = config.DETECTION_MAX_INSTANCES
class_scores_keep = tf.gather(class_scores, keep)
num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
keep = tf.gather(keep, top_ids)
# Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
# Coordinates are in image domain.
detections = tf.concat([
#tf.to_float(tf.gather(refined_rois, keep)),
tf.cast(tf.gather(refined_rois, keep), tf.float32),
#tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis],
tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis],
tf.gather(class_scores, keep)[..., tf.newaxis]
], axis=1)
# Pad with zeros if detections < DETECTION_MAX_INSTANCES
gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")
return detections
class InferenceDetectionLayer(layers.Layer):
"""
Takes classified proposal boxes and their bounding box deltas and
returns the final detection boxes.
"""
def __init__(self,config,**kwargs):
super(InferenceDetectionLayer,self).__init__(**kwargs)
self.config = config
def call(self,input):
rois = input[0]
rcnn_class = input[1]
rcnn_bbox = input[2]
image_meta = input[3]
window = utils.parse_image_meta(image_meta)['window']
detections = utils.batch_slice(
[rois, rcnn_class, rcnn_bbox, window],
lambda x, y, z, w: refine_detections(x, y, z, w, self.config),
self.config.IMAGES_PER_GPU
)
#[N, (y1, x1, y2, x2, class_id, score)]
return tf.reshape(detections,
[self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def compute_output_shape(self, input_shape):
return (None, self.config.DETECTION_MAX_INSTANCES, 6)