-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
faster_rcnn_vgg16.py
158 lines (127 loc) · 5.13 KB
/
faster_rcnn_vgg16.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import torch as t
from torch import nn
from torchvision.models import vgg16
from model.region_proposal_network import RegionProposalNetwork
from model.faster_rcnn import FasterRCNN
from model.roi_module import RoIPooling2D
from utils import array_tool as at
from utils.config import opt
def decom_vgg16():
# the 30th layer of features is relu of conv5_3
if opt.caffe_pretrain:
model = vgg16(pretrained=False)
if not opt.load_path:
model.load_state_dict(t.load(opt.caffe_pretrain_path))
else:
model = vgg16(not opt.load_path)
features = list(model.features)[:30]
classifier = model.classifier
classifier = list(classifier)
del classifier[6]
if not opt.use_drop:
del classifier[5]
del classifier[2]
classifier = nn.Sequential(*classifier)
# freeze top4 conv
for layer in features[:10]:
for p in layer.parameters():
p.requires_grad = False
return nn.Sequential(*features), classifier
class FasterRCNNVGG16(FasterRCNN):
"""Faster R-CNN based on VGG-16.
For descriptions on the interface of this model, please refer to
:class:`model.faster_rcnn.FasterRCNN`.
Args:
n_fg_class (int): The number of classes excluding the background.
ratios (list of floats): This is ratios of width to height of
the anchors.
anchor_scales (list of numbers): This is areas of anchors.
Those areas will be the product of the square of an element in
:obj:`anchor_scales` and the original area of the reference
window.
"""
feat_stride = 16 # downsample 16x for output of conv5 in vgg16
def __init__(self,
n_fg_class=20,
ratios=[0.5, 1, 2],
anchor_scales=[8, 16, 32]
):
extractor, classifier = decom_vgg16()
rpn = RegionProposalNetwork(
512, 512,
ratios=ratios,
anchor_scales=anchor_scales,
feat_stride=self.feat_stride,
)
head = VGG16RoIHead(
n_class=n_fg_class + 1,
roi_size=7,
spatial_scale=(1. / self.feat_stride),
classifier=classifier
)
super(FasterRCNNVGG16, self).__init__(
extractor,
rpn,
head,
)
class VGG16RoIHead(nn.Module):
"""Faster R-CNN Head for VGG-16 based implementation.
This class is used as a head for Faster R-CNN.
This outputs class-wise localizations and classification based on feature
maps in the given RoIs.
Args:
n_class (int): The number of classes possibly including the background.
roi_size (int): Height and width of the feature maps after RoI-pooling.
spatial_scale (float): Scale of the roi is resized.
classifier (nn.Module): Two layer Linear ported from vgg16
"""
def __init__(self, n_class, roi_size, spatial_scale,
classifier):
# n_class includes the background
super(VGG16RoIHead, self).__init__()
self.classifier = classifier
self.cls_loc = nn.Linear(4096, n_class * 4)
self.score = nn.Linear(4096, n_class)
normal_init(self.cls_loc, 0, 0.001)
normal_init(self.score, 0, 0.01)
self.n_class = n_class
self.roi_size = roi_size
self.spatial_scale = spatial_scale
self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def forward(self, x, rois, roi_indices):
"""Forward the chain.
We assume that there are :math:`N` batches.
Args:
x (Variable): 4D image variable.
rois (Tensor): A bounding box array containing coordinates of
proposal boxes. This is a concatenation of bounding box
arrays from multiple images in the batch.
Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
RoIs from the :math:`i` th image,
:math:`R' = \\sum _{i=1} ^ N R_i`.
roi_indices (Tensor): An array containing indices of images to
which bounding boxes correspond to. Its shape is :math:`(R',)`.
"""
# in case roi_indices is ndarray
roi_indices = at.totensor(roi_indices).float()
rois = at.totensor(rois).float()
indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)
# NOTE: important: yx->xy
xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
indices_and_rois = t.autograd.Variable(xy_indices_and_rois.contiguous())
pool = self.roi(x, indices_and_rois)
pool = pool.view(pool.size(0), -1)
fc7 = self.classifier(pool)
roi_cls_locs = self.cls_loc(fc7)
roi_scores = self.score(fc7)
return roi_cls_locs, roi_scores
def normal_init(m, mean, stddev, truncated=False):
"""
weight initalizer: truncated normal and random normal.
"""
# x is a parameter
if truncated:
m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
else:
m.weight.data.normal_(mean, stddev)
m.bias.data.zero_()