-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
util.py
287 lines (228 loc) · 9.49 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import numpy as np
from PIL import Image
import random
def read_image(path, dtype=np.float32, color=True):
"""Read an image from a file.
This function reads an image from given file. The image is CHW format and
the range of its value is :math:`[0, 255]`. If :obj:`color = True`, the
order of the channels is RGB.
Args:
path (str): A path of image file.
dtype: The type of array. The default value is :obj:`~numpy.float32`.
color (bool): This option determines the number of channels.
If :obj:`True`, the number of channels is three. In this case,
the order of the channels is RGB. This is the default behaviour.
If :obj:`False`, this function returns a grayscale image.
Returns:
~numpy.ndarray: An image.
"""
f = Image.open(path)
try:
if color:
img = f.convert('RGB')
else:
img = f.convert('P')
img = np.asarray(img, dtype=dtype)
finally:
if hasattr(f, 'close'):
f.close()
if img.ndim == 2:
# reshape (H, W) -> (1, H, W)
return img[np.newaxis]
else:
# transpose (H, W, C) -> (C, H, W)
return img.transpose((2, 0, 1))
def resize_bbox(bbox, in_size, out_size):
"""Resize bounding boxes according to image resize.
The bounding boxes are expected to be packed into a two dimensional
tensor of shape :math:`(R, 4)`, where :math:`R` is the number of
bounding boxes in the image. The second axis represents attributes of
the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`,
where the four attributes are coordinates of the top left and the
bottom right vertices.
Args:
bbox (~numpy.ndarray): An array whose shape is :math:`(R, 4)`.
:math:`R` is the number of bounding boxes.
in_size (tuple): A tuple of length 2. The height and the width
of the image before resized.
out_size (tuple): A tuple of length 2. The height and the width
of the image after resized.
Returns:
~numpy.ndarray:
Bounding boxes rescaled according to the given image shapes.
"""
bbox = bbox.copy()
y_scale = float(out_size[0]) / in_size[0]
x_scale = float(out_size[1]) / in_size[1]
bbox[:, 0] = y_scale * bbox[:, 0]
bbox[:, 2] = y_scale * bbox[:, 2]
bbox[:, 1] = x_scale * bbox[:, 1]
bbox[:, 3] = x_scale * bbox[:, 3]
return bbox
def flip_bbox(bbox, size, y_flip=False, x_flip=False):
"""Flip bounding boxes accordingly.
The bounding boxes are expected to be packed into a two dimensional
tensor of shape :math:`(R, 4)`, where :math:`R` is the number of
bounding boxes in the image. The second axis represents attributes of
the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`,
where the four attributes are coordinates of the top left and the
bottom right vertices.
Args:
bbox (~numpy.ndarray): An array whose shape is :math:`(R, 4)`.
:math:`R` is the number of bounding boxes.
size (tuple): A tuple of length 2. The height and the width
of the image before resized.
y_flip (bool): Flip bounding box according to a vertical flip of
an image.
x_flip (bool): Flip bounding box according to a horizontal flip of
an image.
Returns:
~numpy.ndarray:
Bounding boxes flipped according to the given flips.
"""
H, W = size
bbox = bbox.copy()
if y_flip:
y_max = H - bbox[:, 0]
y_min = H - bbox[:, 2]
bbox[:, 0] = y_min
bbox[:, 2] = y_max
if x_flip:
x_max = W - bbox[:, 1]
x_min = W - bbox[:, 3]
bbox[:, 1] = x_min
bbox[:, 3] = x_max
return bbox
def crop_bbox(
bbox, y_slice=None, x_slice=None,
allow_outside_center=True, return_param=False):
"""Translate bounding boxes to fit within the cropped area of an image.
This method is mainly used together with image cropping.
This method translates the coordinates of bounding boxes like
:func:`data.util.translate_bbox`. In addition,
this function truncates the bounding boxes to fit within the cropped area.
If a bounding box does not overlap with the cropped area,
this bounding box will be removed.
The bounding boxes are expected to be packed into a two dimensional
tensor of shape :math:`(R, 4)`, where :math:`R` is the number of
bounding boxes in the image. The second axis represents attributes of
the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`,
where the four attributes are coordinates of the top left and the
bottom right vertices.
Args:
bbox (~numpy.ndarray): Bounding boxes to be transformed. The shape is
:math:`(R, 4)`. :math:`R` is the number of bounding boxes.
y_slice (slice): The slice of y axis.
x_slice (slice): The slice of x axis.
allow_outside_center (bool): If this argument is :obj:`False`,
bounding boxes whose centers are outside of the cropped area
are removed. The default value is :obj:`True`.
return_param (bool): If :obj:`True`, this function returns
indices of kept bounding boxes.
Returns:
~numpy.ndarray or (~numpy.ndarray, dict):
If :obj:`return_param = False`, returns an array :obj:`bbox`.
If :obj:`return_param = True`,
returns a tuple whose elements are :obj:`bbox, param`.
:obj:`param` is a dictionary of intermediate parameters whose
contents are listed below with key, value-type and the description
of the value.
* **index** (*numpy.ndarray*): An array holding indices of used \
bounding boxes.
"""
t, b = _slice_to_bounds(y_slice)
l, r = _slice_to_bounds(x_slice)
crop_bb = np.array((t, l, b, r))
if allow_outside_center:
mask = np.ones(bbox.shape[0], dtype=bool)
else:
center = (bbox[:, :2] + bbox[:, 2:]) / 2.0
mask = np.logical_and(crop_bb[:2] <= center, center < crop_bb[2:]) \
.all(axis=1)
bbox = bbox.copy()
bbox[:, :2] = np.maximum(bbox[:, :2], crop_bb[:2])
bbox[:, 2:] = np.minimum(bbox[:, 2:], crop_bb[2:])
bbox[:, :2] -= crop_bb[:2]
bbox[:, 2:] -= crop_bb[:2]
mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:]).all(axis=1))
bbox = bbox[mask]
if return_param:
return bbox, {'index': np.flatnonzero(mask)}
else:
return bbox
def _slice_to_bounds(slice_):
if slice_ is None:
return 0, np.inf
if slice_.start is None:
l = 0
else:
l = slice_.start
if slice_.stop is None:
u = np.inf
else:
u = slice_.stop
return l, u
def translate_bbox(bbox, y_offset=0, x_offset=0):
"""Translate bounding boxes.
This method is mainly used together with image transforms, such as padding
and cropping, which translates the left top point of the image from
coordinate :math:`(0, 0)` to coordinate
:math:`(y, x) = (y_{offset}, x_{offset})`.
The bounding boxes are expected to be packed into a two dimensional
tensor of shape :math:`(R, 4)`, where :math:`R` is the number of
bounding boxes in the image. The second axis represents attributes of
the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`,
where the four attributes are coordinates of the top left and the
bottom right vertices.
Args:
bbox (~numpy.ndarray): Bounding boxes to be transformed. The shape is
:math:`(R, 4)`. :math:`R` is the number of bounding boxes.
y_offset (int or float): The offset along y axis.
x_offset (int or float): The offset along x axis.
Returns:
~numpy.ndarray:
Bounding boxes translated according to the given offsets.
"""
out_bbox = bbox.copy()
out_bbox[:, :2] += (y_offset, x_offset)
out_bbox[:, 2:] += (y_offset, x_offset)
return out_bbox
def random_flip(img, y_random=False, x_random=False,
return_param=False, copy=False):
"""Randomly flip an image in vertical or horizontal direction.
Args:
img (~numpy.ndarray): An array that gets flipped. This is in
CHW format.
y_random (bool): Randomly flip in vertical direction.
x_random (bool): Randomly flip in horizontal direction.
return_param (bool): Returns information of flip.
copy (bool): If False, a view of :obj:`img` will be returned.
Returns:
~numpy.ndarray or (~numpy.ndarray, dict):
If :obj:`return_param = False`,
returns an array :obj:`out_img` that is the result of flipping.
If :obj:`return_param = True`,
returns a tuple whose elements are :obj:`out_img, param`.
:obj:`param` is a dictionary of intermediate parameters whose
contents are listed below with key, value-type and the description
of the value.
* **y_flip** (*bool*): Whether the image was flipped in the\
vertical direction or not.
* **x_flip** (*bool*): Whether the image was flipped in the\
horizontal direction or not.
"""
y_flip, x_flip = False, False
if y_random:
y_flip = random.choice([True, False])
if x_random:
x_flip = random.choice([True, False])
if y_flip:
img = img[:, ::-1, :]
if x_flip:
img = img[:, :, ::-1]
if copy:
img = img.copy()
if return_param:
return img, {'y_flip': y_flip, 'x_flip': x_flip}
else:
return img