/
elu.py
125 lines (95 loc) · 3.4 KB
/
elu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy
from chainer.backends import cuda
from chainer import function_node
from chainer.utils import type_check
class ELU(function_node.FunctionNode):
"""Exponential Linear Unit."""
def __init__(self, alpha=1.0):
self.alpha = float(alpha)
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 1)
x_type, = in_types
type_check.expect(x_type.dtype.kind == 'f')
def forward_cpu(self, x):
self.retain_inputs((0,))
y = x[0].copy()
neg_indices = x[0] < 0
y[neg_indices] = self.alpha * (numpy.exp(y[neg_indices]) - 1)
return y,
def forward_gpu(self, x):
self.retain_inputs((0,))
y = cuda.elementwise(
'T x, T alpha', 'T y',
'y = x >= 0 ? x : (T)(alpha * (exp(x) - 1))',
'elu_fwd')(
x[0], self.alpha)
return y,
def backward(self, indexes, grad_outputs):
x, = self.get_retained_inputs()
gy, = grad_outputs
return ELUGrad(self.alpha).apply((x, gy))
class ELUGrad(function_node.FunctionNode):
"""Exponential Linear Unit gradient function."""
def __init__(self, alpha):
self.alpha = alpha
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
type_check.expect(in_types[0].dtype.kind == 'f')
type_check.expect(in_types[1].dtype.kind == 'f')
def forward_cpu(self, inputs):
x, gy = inputs
gx = gy.copy()
neg_indices = x < 0
gx[neg_indices] *= self.alpha * numpy.exp(x[neg_indices])
self.retain_inputs((0, 1))
self.retain_outputs((0,))
return gx,
def forward_gpu(self, inputs):
x, gy = inputs
gx = cuda.elementwise(
'T x, T gy, T alpha', 'T gx',
'gx = x >= 0 ? gy : (T)(gy * alpha * exp(x))',
'elu_bwd')(
x, gy, self.alpha)
self.retain_inputs((0, 1))
self.retain_outputs((0,))
return gx,
def backward(self, indexes, grad_outputs):
x, gy = self.get_retained_inputs()
gx, = self.get_retained_outputs()
ggx, = grad_outputs
ggxgx = ggx * gx
ret = []
if 0 in indexes:
ret.append(ggxgx * (x.data < 0))
if 1 in indexes:
ret.append(ggxgx / gy)
return ret
def elu(x, alpha=1.0):
"""Exponential Linear Unit function.
For a parameter :math:`\\alpha`, it is expressed as
.. math::
f(x) = \\left \\{ \\begin{array}{ll}
x & {\\rm if}~ x \\ge 0 \\\\
\\alpha (\\exp(x) - 1) & {\\rm if}~ x < 0,
\\end{array} \\right.
See: https://arxiv.org/abs/1511.07289
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`):
Input variable. A :math:`(s_1, s_2, ..., s_N)`-shaped float array.
alpha (float): Parameter :math:`\\alpha`. Default is 1.0.
Returns:
~chainer.Variable: Output variable. A
:math:`(s_1, s_2, ..., s_N)`-shaped float array.
.. admonition:: Example
>>> x = np.array([[-1, 0], [2, -3]], 'f')
>>> x
array([[-1., 0.],
[ 2., -3.]], dtype=float32)
>>> y = F.elu(x, alpha=1.)
>>> y.data
array([[-0.63212055, 0. ],
[ 2. , -0.95021296]], dtype=float32)
"""
return ELU(alpha=alpha).apply((x,))[0]