/
depthwise_convolution_2d.py
193 lines (153 loc) · 6.44 KB
/
depthwise_convolution_2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import numpy
from chainer import cuda
from chainer import function
from chainer.utils import conv
from chainer.utils import type_check
def _pair(x):
if hasattr(x, '__getitem__'):
return x
return x, x
def _matmul(a, b, xp):
if xp is numpy:
# numpy 1.9 does not support matmul.
# So we use numpy.einsum instead of numpy.matmul.
return xp.einsum('ijk,ikl->ijl', a, b)
else:
return xp.matmul(a, b)
class DepthwiseConvolution2D(function.Function):
def __init__(self, stride=1, pad=0):
self.sy, self.sx = _pair(stride)
self.ph, self.pw = _pair(pad)
def check_type_forward(self, in_types):
n_in = in_types.size()
type_check.expect(2 <= n_in, n_in <= 3)
x_type = in_types[0]
w_type = in_types[1]
type_check.expect(
x_type.dtype.kind == 'f',
w_type.dtype.kind == 'f',
x_type.ndim == 4,
w_type.ndim == 4,
x_type.shape[1] == w_type.shape[1],
)
if type_check.eval(n_in) == 3:
b_type = in_types[2]
type_check.expect(
b_type.dtype == x_type.dtype,
b_type.ndim == 1,
b_type.shape[0] == w_type.shape[0] * w_type.shape[1],
)
def forward(self, inputs):
x, W = inputs[:2]
b = inputs[2] if len(inputs) == 3 else None
kh, kw = W.shape[2:]
xp = cuda.get_array_module(*x)
if xp is numpy:
self.col = conv.im2col_cpu(
x, kh, kw, self.sy, self.sx, self.ph, self.pw)
else:
self.col = conv.im2col_gpu(
x, kh, kw, self.sy, self.sx, self.ph, self.pw)
B, C, KY, KX, IY, IX = self.col.shape
D = W.shape[0] # (D, C, KY, KX)
c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
.reshape((C, B * IY * IX, KY * KX))
w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
# (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D)
y = _matmul(c_, w_, xp).astype(x.dtype, copy=False)
# (C, B*IY*IX, D) -> (B, C*D, IY, IX)
y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \
.reshape((B, C * D, IY, IX))
if b is not None:
y += b[None, :, None, None]
return y,
def backward(self, inputs, grad_outputs):
x, W = inputs[:2]
b = inputs[2] if len(inputs) == 3 else None
gy = grad_outputs[0]
h, w = x.shape[2:]
xp = cuda.get_array_module(*x)
B, C, KY, KX, IY, IX = self.col.shape
D = W.shape[0]
# (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
.reshape((C, D, B * IY * IX))
c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
.reshape((C, B * IY * IX, KY * KX))
# (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
gW_ = _matmul(gy_, c_, xp)
gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
gW = gW.astype(W.dtype, copy=False)
w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
# (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
gcol = gcol.astype(x.dtype, copy=False)
gcol = xp.rollaxis(gcol, 3)
if xp is numpy:
gx = conv.col2im_cpu(gcol, self.sy, self.sx,
self.ph, self.pw, h, w)
else:
gx = conv.col2im_gpu(gcol, self.sy, self.sx,
self.ph, self.pw, h, w)
if b is None:
return gx, gW
else:
gy = xp.rollaxis(gy, 1, 4)
gb = gy.sum(axis=(0, 1, 2))
return gx, gW, gb
def depthwise_convolution_2d(x, W, b=None, stride=1, pad=0):
"""Two-dimensional depthwise convolution function.
This is an implementation of two-dimensional depthwise convolution.
It takes two or three variables: the input image ``x``, the filter weight
``W``, and optionally, the bias vector ``b``.
Notation: here is a notation for dimensionalities.
- :math:`n` is the batch size.
- :math:`c_I` is the number of the input.
- :math:`c_M` is the channel multiplier.
- :math:`h` and :math:`w` are the height and width of the input image,
respectively.
- :math:`h_O` and :math:`w_O` are the height and width of the output image,
respectively.
- :math:`k_H` and :math:`k_W` are the height and width of the filters,
respectively.
Args:
x (chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray):
Input variable of shape :math:`(n, c_I, h, w)`.
W (~chainer.Variable): Weight variable of shape
:math:`(c_M, c_I, k_H, k_W)`.
b (~chainer.Variable):
Bias variable of length :math:`c_M * c_I` (optional).
stride (int or pair of ints): Stride of filter applications.
``stride=s`` and ``stride=(s, s)`` are equivalent.
pad (int or pair of ints): Spatial padding width for input arrays.
``pad=p`` and ``pad=(p, p)`` are equivalent.
Returns:
~chainer.Variable:
Output variable. Its shape is :math:`(n, c_I * c_M, h_O, w_O)`.
Like ``Convolution2D``, ``DepthwiseConvolution2D`` function computes
correlations between filters and patches of size :math:`(k_H, k_W)` in
``x``.
But unlike ``Convolution2D``, ``DepthwiseConvolution2D`` does not add up
input channels of filters but concatenates them.
For that reason, the shape of outputs of depthwise convolution are
:math:`(n, c_I * c_M, h_O, w_O)`, :math:`c_M` is called channel_multiplier.
:math:`(h_O, w_O)` is determined by the equivalent equation of
``Convolution2D``.
If the bias vector is given, then it is added to all spatial locations of
the output of convolution.
See: `L. Sifre. Rigid-motion scattering for image classification\
<http://www.di.ens.fr/data/publications/papers/phd_sifre.pdf>`_
.. seealso:: :class:`~chainer.links.DepthwiseConvolution2D`
.. admonition:: Example
>>> x = np.random.uniform(0, 1, (2, 3, 4, 7))
>>> W = np.random.uniform(0, 1, (2, 3, 3, 3))
>>> b = np.random.uniform(0, 1, (6,))
>>> y = F.depthwise_convolution_2d(x, W, b)
>>> y.shape
(2, 6, 2, 5)
"""
func = DepthwiseConvolution2D(stride, pad)
if b is None:
return func(x, W)
else:
return func(x, W, b)