/
softplus.py
120 lines (92 loc) · 3.51 KB
/
softplus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy
import chainer.functions
from chainer import cuda
from chainer import function_node
from chainer import utils
from chainer.utils import type_check
class Softplus(function_node.FunctionNode):
"""Softplus function."""
def __init__(self, beta=1.0):
self.beta = float(beta)
self.beta_inv = float(1.0 / beta)
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 1)
x_type, = in_types
type_check.expect(x_type.dtype.kind == 'f')
def forward_cpu(self, inputs):
self.retain_inputs((0,))
x = inputs[0]
# y = log(1 + exp(beta * x)) / beta
bx = self.beta * x
y = (numpy.fmax(bx, 0) +
numpy.log1p(numpy.exp(-numpy.fabs(bx)))) * self.beta_inv
return utils.force_array(y, x.dtype),
def forward_gpu(self, inputs):
self.retain_inputs((0,))
x = inputs[0]
y = cuda.elementwise(
'T x, T beta, T beta_inv', 'T y',
'''
T bx = beta * x;
y = (max(bx, (T)0) + log1p(exp(-fabs(bx)))) * beta_inv;
''',
'softplus_fwd'
)(x, self.beta, self.beta_inv)
return y,
def backward(self, indexes, grad_outputs):
x = self.get_retained_inputs()[0]
gy, = grad_outputs
return SoftplusGrad((self.beta,)).apply((x, gy))
class SoftplusGrad(function_node.FunctionNode):
"""Softplus gradient function."""
def __init__(self, inputs):
super(SoftplusGrad, self).__init__()
self.beta = inputs[0]
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
x_type, gy_type = in_types
type_check.expect(x_type.dtype.kind == 'f')
type_check.expect(gy_type.dtype.kind == 'f')
def forward_cpu(self, inputs):
self.retain_inputs((0, 1))
x, gy = inputs
gx = (1 - 1 / (1 + numpy.exp(self.beta * x))) * gy
return utils.force_array(gx, x.dtype),
def forward_gpu(self, inputs):
self.retain_inputs((0, 1))
x, gy = inputs
gx = cuda.elementwise(
'T x, T gy, T beta', 'T gx',
'gx = (1 - 1 / (1 + exp(beta * x))) * gy',
'softplus_bwd')(x, gy, self.beta)
return gx,
def backward(self, indexes, grad_outputs):
x, gy = self.get_retained_inputs()
g, = grad_outputs
e = chainer.functions.exp(self.beta * x)
ggx = g * gy * self.beta * e / (1 + e) ** 2
ggy = SoftplusGrad((self.beta,)).apply((x, g))[0]
return ggx, ggy
def softplus(x, beta=1.0):
"""Element-wise softplus function.
The softplus function is the smooth approximation of ReLU.
.. math:: f(x)=\\frac{1}{\\beta}\\log(1 + \\exp(\\beta x)),
where :math:`\\beta` is a parameter. The function becomes curved
and akin to ReLU as the :math:`\\beta` is increasing.
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`):
Input variable. A :math:`(s_1, s_2, ..., s_N)`-shaped float array.
beta (float): Parameter :math:`\\beta`.
Returns:
~chainer.Variable: Output variable. A
:math:`(s_1, s_2, ..., s_N)`-shaped float array.
.. admonition:: Example
>>> x = np.arange(-2, 3, 2).astype('f')
>>> x
array([-2., 0., 2.], dtype=float32)
>>> F.softplus(x, beta=1.0).data
array([ 0.126928 , 0.69314718, 2.12692809], dtype=float32)
"""
y, = Softplus(beta=beta).apply((x,))
return y