-
Notifications
You must be signed in to change notification settings - Fork 111
/
bbox.py
94 lines (71 loc) · 3.21 KB
/
bbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import torch
def nonlinear_transform(ex_rois, gt_rois):
"""
compute bounding box regression targets from ex_rois to gt_rois
:param ex_rois: [k, 4] ([x1, y1, x2, y2])
:param gt_rois: [k, 4] (corresponding gt_boxes [x1, y1, x2, y2] )
:return: bbox_targets: [k, 4]
"""
assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-6)
targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-6)
targets_dw = torch.log(gt_widths / (ex_widths).clamp(min=1e-6))
targets_dh = torch.log(gt_heights / ((ex_heights).clamp(min=1e-6)))
targets = torch.cat(
(targets_dx.view(-1, 1), targets_dy.view(-1, 1), targets_dw.view(-1, 1), targets_dh.view(-1, 1)), dim=-1)
return targets
def coordinate_embeddings(boxes, dim):
"""
Coordinate embeddings of bounding boxes
:param boxes: [K, 6] ([x1, y1, x2, y2, w_image, h_image])
:param dim: sin/cos embedding dimension
:return: [K, 4, 2 * dim]
"""
num_boxes = boxes.shape[0]
w = boxes[:, 4]
h = boxes[:, 5]
# transform to (x_c, y_c, w, h) format
boxes_ = boxes.new_zeros((num_boxes, 4))
boxes_[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2
boxes_[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2
boxes_[:, 2] = boxes[:, 2] - boxes[:, 0]
boxes_[:, 3] = boxes[:, 3] - boxes[:, 1]
boxes = boxes_
# position
pos = boxes.new_zeros((num_boxes, 4))
pos[:, 0] = boxes[:, 0] / w * 100
pos[:, 1] = boxes[:, 1] / h * 100
pos[:, 2] = boxes[:, 2] / w * 100
pos[:, 3] = boxes[:, 3] / h * 100
# sin/cos embedding
dim_mat = 1000 ** (torch.arange(dim, dtype=boxes.dtype, device=boxes.device) / dim)
sin_embedding = (pos.view((num_boxes, 4, 1)) / dim_mat.view((1, 1, -1))).sin()
cos_embedding = (pos.view((num_boxes, 4, 1)) / dim_mat.view((1, 1, -1))).cos()
return torch.cat((sin_embedding, cos_embedding), dim=-1)
def bbox_iou_py_vectorized(boxes, query_boxes):
n_ = boxes.shape[0]
k_ = query_boxes.shape[0]
n_mesh, k_mesh = torch.meshgrid([torch.arange(n_), torch.arange(k_)])
n_mesh = n_mesh.contiguous().view(-1)
k_mesh = k_mesh.contiguous().view(-1)
boxes = boxes[n_mesh]
query_boxes = query_boxes[k_mesh]
x11, y11, x12, y12 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
x21, y21, x22, y22 = query_boxes[:, 0], query_boxes[:, 1], query_boxes[:, 2], query_boxes[:, 3]
xA = torch.max(x11, x21)
yA = torch.max(y11, y21)
xB = torch.min(x12, x22)
yB = torch.min(y12, y22)
interArea = torch.clamp(xB - xA + 1, min=0) * torch.clamp(yB - yA + 1, min=0)
boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
iou = interArea / (boxAArea + boxBArea - interArea)
return iou.view(n_, k_).to(boxes.device)