In [2]:
import torch
import torch.nn as nn

image = torch.zeros(1,3,800,800).float()
bbox = torch.FloatTensor([[20,30,400,500],[300,400,500,600]]) # y1,x1,y2,x2
labels = torch.LongTensor([6,8]) # 0 represents background
sub_sample = 16

In [3]:
import torchvision

print(image)

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]]])


In [7]:
vgg = torchvision.models.vgg16(pretrained=True)
fee =[]
x = image.clone()
for k,v in vgg.features.named_children():
    x = v(x)
    if x.size()[2] < 800 // 16:
        print(k,v)
        break
    fee.append(v)
    out_channels = x.size()[1]
    
print(len(fee))
print(out_channels)

30 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
30
512


In [9]:
faster_rcnn_extractor = nn.Sequential(*fee)
print(faster_rcnn_extractor)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(

In [10]:
out_map = faster_rcnn_extractor(image)
print(out_map.size())

torch.Size([1, 512, 50, 50])


In [12]:
import numpy as np

ratio = [0.5,1,2]
anchor_scales = [8,16,32]

anchor_base = np.zeros((len(ratio) * len(anchor_scales),4),dtype=np.float32) # y1,x1,y2,x2
print(anchor_base)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [13]:
ctr_y = sub_sample / 2
ctr_x = sub_sample / 2
print(ctr_y,ctr_x)

for i in range(len(ratio)):
    for j in range(len(anchor_scales)):
        h = sub_sample * anchor_scales[j] * np.sqrt(ratio[i])
        w = sub_sample * anchor_scales[j] * np.sqrt(1. / ratio[i])
        
        index = i * len(anchor_scales) + j
        
        anchor_base[index,0] = ctr_y - h / 2
        anchor_base[index,1] = ctr_x - w / 2
        anchor_base[index,2] = ctr_y + h / 2
        anchor_base[index,3] = ctr_x + w / 2
        
print(anchor_base)

8.0 8.0
[[ -37.254833  -82.50967    53.254833   98.50967 ]
 [ -82.50967  -173.01933    98.50967   189.01933 ]
 [-173.01933  -354.03867   189.01933   370.03867 ]
 [ -56.        -56.         72.         72.      ]
 [-120.       -120.        136.        136.      ]
 [-248.       -248.        264.        264.      ]
 [ -82.50967   -37.254833   98.50967    53.254833]
 [-173.01933   -82.50967   189.01933    98.50967 ]
 [-354.03867  -173.01933   370.03867   189.01933 ]]


In [18]:
fe_size = (800 // 16)
ctr_x = np.arange(16,(fe_size + 1) * 16,16)
ctr_y = np.arange(16,(fe_size + 1) * 16,16)
print(fe_size)
print(ctr_x)
print(ctr_y)

50
[ 16  32  48  64  80  96 112 128 144 160 176 192 208 224 240 256 272 288
 304 320 336 352 368 384 400 416 432 448 464 480 496 512 528 544 560 576
 592 608 624 640 656 672 688 704 720 736 752 768 784 800]
[ 16  32  48  64  80  96 112 128 144 160 176 192 208 224 240 256 272 288
 304 320 336 352 368 384 400 416 432 448 464 480 496 512 528 544 560 576
 592 608 624 640 656 672 688 704 720 736 752 768 784 800]


In [16]:
index = 0
ctr = np.zeros((len(ctr_x) * len(ctr_y),2))
for x in range(len(ctr_x)):
    for y in range(len(ctr_y)):
        ctr[index,1] = ctr_x[x] - 8
        ctr[index,0] = ctr_y[y] - 8
        index += 1
        
print(ctr)

[[  8.   8.]
 [ 24.   8.]
 [ 40.   8.]
 ...
 [760. 792.]
 [776. 792.]
 [792. 792.]]


In [19]:
anchors = np.zeros((fe_size * fe_size * 9,4)) # 每个中心点生成9个anchor boxes,y1,x1,y2,x2

index = 0
for c in ctr:
    ctr_y,ctr_x = c
    for i in range(len(ratio)):
        for j in range(len(anchor_scales)):
            h = sub_sample * anchor_scales[j] * np.sqrt(ratio[i])
            w = sub_sample * anchor_scales[j] * np.sqrt(1. / ratio[i])
        
            anchors[index,0] = ctr_y - h / 2
            anchors[index,1] = ctr_x - w / 2
            anchors[index,2] = ctr_y + h / 2
            anchors[index,3] = ctr_x + w / 2
            
            index += 1
print(anchors.shape)

(22500, 4)


In [20]:
print(anchors[0])

[-37.254834   -82.50966799  53.254834    98.50966799]


In [21]:
print(anchors[100])

[  93.49033201 -173.01933598  274.50966799  189.01933598]


已经生成所有的ａｎｃｈｏｒ　ｂｏｘｅｓ完毕，下面就需要给每个anchor box打上标签，以及该ａｎｃｈｏｒ　ｂｏｘ所对应的目标,若果anchor boxe和目标的ground-truth-box的ＩｏＵ　大于等于0.7，则为该ａｎｃｈｏｒ打上positive label

In [30]:
index_inside = np.where(
    (anchors[:,0] >= 0) &
    (anchors[:,1] >= 0) &
    (anchors[:,2] <= 800) &
    (anchors[:,3] <= 800)
)[0]
print(index_inside.shape)

(8940,)


In [31]:
label = np.empty((len(index_inside),),dtype=np.int32)
label.fill(-1)
print(label.shape)

(8940,)


In [32]:
valid_anchor_boxes = anchors[index_inside]
print(valid_anchor_boxes.shape)
print(valid_anchor_boxes[0])

(8940, 4)
[ 13.49033201  10.745166   194.50966799 101.254834  ]


In [37]:
ious = np.empty((len(valid_anchor_boxes),2),dtype=np.float32)
ious.fill(0)
print(bbox)
for num1,i in enumerate(valid_anchor_boxes):
    ya1,xa1,ya2,xa2 = i
    anchor_area = (ya2 - ya1) * (xa2 - xa1)
    for num2,j in enumerate(bbox):
        yb1,xb1,yb2,xb2 = j
        box_area = (yb2 - yb1) * (xb2 - xb1)
        
        inter_x1 = max([xb1,xa1])
        inter_y1 = max([yb1,ya1])
        inter_x2 = min([xb2,xa2])
        inter_y2 = min([yb2,ya2])
        
        if (inter_x1 < inter_x2) and (inter_y1 < inter_y2):
            iter_area = (inter_y2 - inter_y1) * (inter_x2 - inter_x1)
            iou = iter_area / (anchor_area + box_area - iter_area)
        else:
            iou = 0.
        
        ious[num1,num2] = iou
        
print(ious.shape)
print((ious > 0.7)[0])

tensor([[ 20.,  30., 400., 500.],
        [300., 400., 500., 600.]])
(8940, 2)
[False False]


In [39]:
gt_argmax_ious = ious.argmax(axis=0)
print(gt_argmax_ious)

gt_max_ious = ious[gt_argmax_ious,np.arange(ious.shape[1])]
print(gt_max_ious)

[2262 5620]
[0.68130493 0.61035156]
