/
local_convolution_2d.py
189 lines (160 loc) · 6.95 KB
/
local_convolution_2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from six import moves
import chainer
from chainer.backends import cuda
from chainer import function_node
from chainer.utils import type_check
from chainer import variable
def _pair(x):
if hasattr(x, '__getitem__'):
return x
return x, x
class LocalConvolution2DFunction(function_node.FunctionNode):
def __init__(self, stride=1):
self.sy, self.sx = _pair(stride)
def check_type_forward(self, in_types):
n_in = in_types.size()
type_check.expect(2 <= n_in, n_in <= 3)
x_type, w_type = in_types[:2]
type_check.expect(
x_type.dtype.kind == 'f',
w_type.dtype.kind == 'f',
x_type.ndim == 4,
w_type.ndim == 6,
x_type.shape[1] == w_type.shape[3],
)
if type_check.eval(n_in) == 3:
b_type = in_types[2]
type_check.expect(
b_type.dtype == x_type.dtype,
b_type.ndim == 3,
b_type.shape == w_type.shape[:3]
)
def forward(self, inputs):
# Channels-first is Chainer's tensor format
# W is 6-dimensional
x, W = inputs[:2]
b = inputs[2] if len(inputs) == 3 else None
stride_row, stride_col = self.sy, self.sx
output_row, output_col = W.shape[1], W.shape[2]
feature_dim = W.shape[3] * W.shape[4] * W.shape[5]
xp = cuda.get_array_module(*inputs)
output = xp.empty((x.shape[0], W.shape[0], output_row, output_col,),
dtype=x.dtype)
for i in moves.range(output_row):
for j in moves.range(output_col):
slice_row = slice(i * stride_row,
i * stride_row + W.shape[4])
slice_col = slice(j * stride_col,
j * stride_col + W.shape[5])
x_flatten = xp.reshape(x[..., slice_row, slice_col],
(-1, feature_dim))
W_flatten = xp.reshape(W[:, i, j, ...],
(-1, feature_dim))
output[..., i, j] = xp.dot(x_flatten, W_flatten.T)
if b is not None:
output += b[None, :, :, :]
self.retain_inputs((0, 1)) # only retain x and W
return output,
def backward(self, indices, grad_outputs):
xvar, Wvar = self.get_retained_inputs()
x = xvar.data
W = Wvar.data
gyvar, = grad_outputs
gy = gyvar.data
xp = cuda.get_array_module(x, W)
stride_row, stride_col = self.sy, self.sx
output_row, output_col = W.shape[1], W.shape[2]
ret = []
if 0 in indices:
gx = xp.zeros_like(x)
for i in moves.range(output_row):
for j in moves.range(output_col):
slice_row = slice(i * stride_row,
i * stride_row + W.shape[4])
slice_col = slice(j * stride_col,
j * stride_col + W.shape[5])
# ochans * ichans * krows * kcols
W_slice = W[:, i, j, ...]
# nsamps * ochans
gy_slice = gy[..., i, j]
# -> nsamps * ichans * krows * kcols
gx[:, :, slice_row, slice_col] += xp.tensordot(
gy_slice, W_slice, axes=[(1,), (0,)]
)
ret.append(chainer.functions.cast(variable.as_variable(gx),
x.dtype))
if 1 in indices:
gW = xp.empty_like(W)
for i in moves.range(output_row):
for j in moves.range(output_col):
slice_row = slice(i * stride_row,
i * stride_row + W.shape[4])
slice_col = slice(j * stride_col,
j * stride_col + W.shape[5])
# nsamps * inchans * krows * kcols
x_slice = x[:, :, slice_row, slice_col]
# nsamps * outchans * 1 * 1
gy_slice = gy[:, :, i, j]
gW[:, i, j, :, :, :] = xp.tensordot(
gy_slice, x_slice, axes=[(0,), (0,)]
)
ret.append(chainer.functions.cast(variable.as_variable(gW),
W.dtype))
if 2 in indices:
gb = chainer.functions.sum(gyvar, axis=0)
ret.append(gb)
return ret
def local_convolution_2d(x, W, b=None, stride=1):
"""Two-dimensional local convolution function.
Locally-connected function for 2D inputs. Works similarly to
convolution_2d, except that weights are unshared, that is, a different set
of filters is applied at each different patch of the input.
It takes two or three variables: the input image ``x``, the filter weight
``W``, and optionally, the bias vector ``b``.
Notation: here is a notation for dimensionalities.
- :math:`n` is the batch size.
- :math:`c_I` is the number of the input.
- :math:`c_O` is the number of output channels.
- :math:`h` and :math:`w` are the height and width of the input image,
respectively.
- :math:`h_O` and :math:`w_O` are the height and width of the output image,
respectively.
- :math:`k_H` and :math:`k_W` are the height and width of the filters,
respectively.
Args:
x (chainer.Variable or :class:`numpy.ndarray` or cupy.ndarray):
Input variable of shape :math:`(n, c_I, h, w)`.
W (~chainer.Variable): Weight variable of shape
:math:`(c_O, h_O, w_O, c_I, k_H, k_W)`.
b (~chainer.Variable):
Bias variable of shape :math:`(c_O,h_O,w_O)` (optional).
stride (int or pair of ints): Stride of filter applications.
``stride=s`` and ``stride=(s, s)`` are equivalent.
Returns:
~chainer.Variable:
Output variable. Its shape is :math:`(n, c_I * c_O, h_O, w_O)`.
Like ``Convolution2D``, ``LocalConvolution2D`` function computes
correlations between filters and patches of size :math:`(k_H, k_W)` in
``x``.
But unlike ``Convolution2D``, ``LocalConvolution2D`` has a separate filter
for each patch of the input
:math:`(h_O, w_O)` is determined by the equivalent equation of
``Convolution2D``, without any padding
If the bias vector is given, then it is added to all spatial locations of
the output of convolution.
.. seealso:: :class:`~chainer.links.LocalConvolution2D`
.. admonition:: Example
>>> x = np.random.uniform(0, 1, (2, 3, 7, 7))
>>> W = np.random.uniform(0, 1, (2, 5, 5, 3, 3, 3))
>>> b = np.random.uniform(0, 1, (2, 5, 5))
>>> y = F.local_convolution_2d(x, W, b)
>>> y.shape
(2, 2, 5, 5)
"""
fnode = LocalConvolution2DFunction(stride)
if b is None:
args = (x, W)
else:
args = (x, W, b)
y, = fnode.apply(args)
return y