/
im2col.py
163 lines (124 loc) · 5.36 KB
/
im2col.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import numpy
from chainer import function_node
from chainer.utils.conv import col2im_cpu
from chainer.utils.conv import col2im_gpu
from chainer.utils.conv import im2col_cpu
from chainer.utils.conv import im2col_gpu
from chainer.utils import type_check
def _pair(x):
if hasattr(x, '__getitem__'):
return x
return x, x
def _col2im(x, *args, **kwargs):
if isinstance(x, numpy.ndarray):
return col2im_cpu(x, *args, **kwargs)
return col2im_gpu(x, *args, **kwargs)
def _im2col(x, *args, **kwargs):
if isinstance(x, numpy.ndarray):
return im2col_cpu(x, *args, **kwargs)
return im2col_gpu(x, *args, **kwargs)
class Im2Col(function_node.FunctionNode):
"Im2Col function."""
def __init__(self, ksize, stride, pad, cover_all, dilate):
self.kh, self.kw = _pair(ksize)
self.sy, self.sx = _pair(stride)
self.ph, self.pw = _pair(pad)
self.dy, self.dx = _pair(dilate)
self.cover_all = cover_all
def check_type_forward(self, in_types):
n_in = in_types.size()
type_check.expect(n_in == 1)
x_type = in_types[0]
type_check.expect(
x_type.dtype.kind == 'f',
x_type.ndim == 4
)
def forward(self, inputs):
x, = inputs
y = _im2col(
x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
cover_all=self.cover_all, dy=self.dy, dx=self.dx)
n, c, kh, kw, out_h, out_w = y.shape
return y.reshape(n, c * kh * kw, out_h, out_w),
def backward(self, indexes, grad_outputs):
return Im2ColGrad((self.kh, self.kw), (self.sy, self.sx),
(self.ph, self.pw), self.cover_all,
(self.dy, self.dx), self.inputs[0].shape) \
.apply(grad_outputs)
class Im2ColGrad(function_node.FunctionNode):
"Im2Col gradient function."""
def __init__(self, ksize, stride, pad, cover_all, dilate, in_shape):
self.kh, self.kw = _pair(ksize)
self.sy, self.sx = _pair(stride)
self.ph, self.pw = _pair(pad)
self.dy, self.dx = _pair(dilate)
self.cover_all = cover_all
self.in_shape = in_shape
def check_type_forward(self, in_types):
n_in = in_types.size()
type_check.expect(n_in == 1)
x_type = in_types[0]
type_check.expect(
x_type.dtype.kind == 'f',
x_type.ndim == 4
)
def forward(self, inputs):
_, c, h, w = self.in_shape
gy, = inputs
n, _, out_h, out_w = gy.shape
gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
gx = _col2im(
gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
return gx,
def backward(self, indexes, grad_outputs):
return Im2Col(
(self.kh, self.kw), (self.sy, self.sx),
(self.ph, self.pw), self.cover_all,
(self.dy, self.dx)).apply(grad_outputs)
def im2col(x, ksize, stride=1, pad=0, cover_all=False, dilate=1):
"""Extract patches from an image based on the filter.
This function rearranges patches of an image and puts them in the channel
dimension of the output.
Patches are extracted at positions shifted by multiples of ``stride`` from
the first position ``-pad`` for each spatial axis.
The right-most (or bottom-most) patches do not run over the padded spatial
size.
Notation: here is a notation.
- :math:`n` is the batch size.
- :math:`c` is the number of the input channels.
- :math:`h` and :math:`w` are the height and width of the input image,
respectively.
- :math:`k_H` and :math:`k_W` are the height and width of the filters,
respectively.
- :math:`s_Y` and :math:`s_X` are the strides of the filter.
- :math:`p_H` and :math:`p_W` are the spatial padding sizes.
- :math:`d_Y` and :math:`d_X` are the dilation factors of filter \
application.
The output size :math:`(h_O, w_O)` is determined by the following
equations when ``cover_all = False``:
.. math::
h_O &= (h + 2p_H - k_H - (k_H - 1) * (d_Y - 1)) / s_Y + 1,\\\\
w_O &= (w + 2p_W - k_W - (k_W - 1) * (d_X - 1)) / s_X + 1.
When ``cover_all = True``, the output size is determined by
the following equations:
.. math::
h_O &= (h + 2p_H - k_H - (k_H - 1) * (d_Y - 1) + s_Y - 1) / s_Y + 1,\\\\
w_O &= (w + 2p_W - k_W - (k_W - 1) * (d_X - 1) + s_X - 1) / s_X + 1.
Args:
x (~chainer.Variable): Input variable of shape :math:`(n, c, h, w)`.
ksize (int or pair of ints): Size of filters (a.k.a. kernels).
``ksize=k`` and ``ksize=(k, k)`` are equivalent.
stride (int or pair of ints): Stride of filter applications.
``stride=s`` and ``stride=(s, s)`` are equivalent.
pad (int or pair of ints): Spatial padding width for input arrays.
``pad=p`` and ``pad=(p, p)`` are equivalent.
cover_all (bool): If ``True``, all spatial locations are rearranged
into some output pixels. It may make the output size larger.
dilate (int or pair of ints): Dilation factor of filter applications.
``dilate=d`` and ``dilate=(d, d)`` are equivalent.
Returns:
~chainer.Variable:
Output variable whose shape is
:math:`(n, c \\cdot k_H \\cdot k_W, h_O, w_O)`
"""
return Im2Col(ksize, stride, pad, cover_all, dilate).apply((x,))[0]