This repository has been archived by the owner on Jul 2, 2021. It is now read-only.
/
yolo_base.py
112 lines (90 loc) · 3.79 KB
/
yolo_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import chainer
from chainer.backends import cuda
from chainercv import transforms
class YOLOBase(chainer.Chain):
"""Base class for YOLOv2 and YOLOv3.
A subclass of this class should have :obj:`extractor`,
:meth:`__call__`, and :meth:`_decode`.
"""
@property
def insize(self):
return self.extractor.insize
def use_preset(self, preset):
"""Use the given preset during prediction.
This method changes values of :obj:`nms_thresh` and
:obj:`score_thresh`. These values are a threshold value
used for non maximum suppression and a threshold value
to discard low confidence proposals in :meth:`predict`,
respectively.
If the attributes need to be changed to something
other than the values provided in the presets, please modify
them by directly accessing the public attributes.
Args:
preset ({'visualize', 'evaluate'}): A string to determine the
preset to use.
"""
if preset == 'visualize':
self.nms_thresh = 0.45
self.score_thresh = 0.5
elif preset == 'evaluate':
self.nms_thresh = 0.45
self.score_thresh = 0.005
else:
raise ValueError('preset must be visualize or evaluate')
def predict(self, imgs):
"""Detect objects from images.
This method predicts objects for each image.
Args:
imgs (iterable of numpy.ndarray): Arrays holding images.
All images are in CHW and RGB format
and the range of their value is :math:`[0, 255]`.
Returns:
tuple of lists:
This method returns a tuple of three lists,
:obj:`(bboxes, labels, scores)`.
* **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
where :math:`R` is the number of bounding boxes in a image. \
Each bounding box is organized by \
:math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
in the second axis.
* **labels** : A list of integer arrays of shape :math:`(R,)`. \
Each value indicates the class of the bounding box. \
Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
number of the foreground classes.
* **scores** : A list of float arrays of shape :math:`(R,)`. \
Each value indicates how confident the prediction is.
"""
x = []
params = []
for img in imgs:
_, H, W = img.shape
img, param = transforms.resize_contain(
img / 255, (self.insize, self.insize), fill=0.5,
return_param=True)
x.append(self.xp.array(img))
param['size'] = (H, W)
params.append(param)
with chainer.using_config('train', False), \
chainer.function.no_backprop_mode():
locs, objs, confs = self(self.xp.stack(x))
locs = locs.array
objs = objs.array
confs = confs.array
bboxes = []
labels = []
scores = []
for loc, obj, conf, param in zip(locs, objs, confs, params):
bbox, label, score = self._decode(loc, obj, conf)
bbox = cuda.to_cpu(bbox)
label = cuda.to_cpu(label)
score = cuda.to_cpu(score)
bbox = transforms.translate_bbox(
bbox, -self.insize / 2, -self.insize / 2)
bbox = transforms.resize_bbox(
bbox, param['scaled_size'], param['size'])
bbox = transforms.translate_bbox(
bbox, param['size'][0] / 2, param['size'][1] / 2)
bboxes.append(bbox)
labels.append(label)
scores.append(score)
return bboxes, labels, scores