/
prelu.py
109 lines (83 loc) · 3.26 KB
/
prelu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy
import six
from chainer import cuda
from chainer import function
from chainer.utils import type_check
def _fwd_kern():
return cuda.elementwise(
'T x, T cond, T W', 'T y',
'y = cond >= 0 ? x : (T)(x * W)', 'prelu')
class PReLUFunction(function.Function):
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
x_type, W_type = in_types
type_check.expect(
x_type.dtype.kind == 'f',
W_type.dtype == x_type.dtype,
x_type.ndim >= W_type.ndim + 1,
x_type.shape[1:1 + type_check.eval(W_type.ndim)] == W_type.shape
)
def forward_cpu(self, inputs):
x, W = inputs
y = x.copy()
masked = numpy.ma.masked_greater_equal(y, 0, copy=False)
shape = _get_extended_shape(W, y)
masked *= W.reshape(shape)
return y,
def forward_gpu(self, inputs):
x, W = inputs
shape = _get_extended_shape(W, x)
y = _fwd_kern()(x, x, W.reshape(shape))
return y,
def backward_cpu(self, inputs, grad_outputs):
x, W = inputs
gy = grad_outputs[0]
mask = x >= 0
axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim))
gW = numpy.where(mask, 0, x * gy).sum(axis=axes)
if numpy.isscalar(gW):
gW = numpy.array(gW)
gx = gy.copy()
masked = numpy.ma.array(gx, mask=mask)
shape = _get_extended_shape(W, gx)
masked *= W.reshape(shape)
return gx, gW
def backward_gpu(self, inputs, grad_outputs):
x, W = inputs
gy = grad_outputs[0]
masked = cuda.elementwise(
'T x, T gy', 'T masked',
'masked = x >= 0 ? (T)0 : (T)(x * gy)',
'prelu_masked')(x, gy)
axes = (0,) + tuple(six.moves.range(1 + W.ndim, gy.ndim))
gW = masked.sum(axis=axes)
gx = masked # reuse buffer
shape = _get_extended_shape(W, gx)
_fwd_kern()(gy, x, W.reshape(shape), gx)
return gx, gW
def prelu(x, W):
"""Parametric ReLU function.
It accepts two arguments: an input ``x`` and a weight array ``W``
and computes the output as :math:`PReLU(x) = \\max(x, W*x)`,
where :math:`*` is an elementwise multiplication for each sample in the
batch.
When the PReLU function is combined with two-dimensional convolution, the
elements of parameter :math:`a` are typically shared across the same filter
of different pixels. In order to support such usage, this function supports
the shape of parameter array that indicates leading dimensions of input
arrays except the batch dimension.
For example :math:`W` has the shape of :math:`(2, 3, 4)`,
:math:`x` must have the shape of :math:`(B, 2, 3, 4, S1, ..., SN)`
where B is batch size and the number of trailing S's
is arbitrary non-negative integer.
Args:
x (~chainer.Variable): Input variable.
Its first argument is assumed to be the minibatch dimension.
W (~chainer.Variable): Weight variable.
Returns:
~chainer.Variable: Output variable
.. seealso:: :class:`~chainer.links.PReLU`
"""
return PReLUFunction()(x, W)
def _get_extended_shape(W, x):
return (1,) + W.shape + (1,) * (x.ndim - W.ndim - 1)