/
tanh.py
114 lines (89 loc) · 3.19 KB
/
tanh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy
import chainer
from chainer.backends import cuda
from chainer import function_node
from chainer import utils
from chainer.utils import type_check
if cuda.cudnn_enabled:
cudnn = cuda.cudnn
libcudnn = cuda.cuda.cudnn
_mode = libcudnn.CUDNN_ACTIVATION_TANH
class Tanh(function_node.FunctionNode):
"""Hyperbolic tangent function."""
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 1)
type_check.expect(in_types[0].dtype.kind == 'f')
def forward_cpu(self, x):
y = utils.force_array(numpy.tanh(x[0]))
self.retain_outputs((0,))
self._use_cudnn = False
return y,
def forward_gpu(self, x):
if chainer.should_use_cudnn('==always') and x[0].flags.c_contiguous:
y = cudnn.activation_forward(x[0], _mode)
self.retain_inputs((0,))
self._use_cudnn = True
else:
y = cuda.cupy.empty_like(x[0])
cuda.cupy.tanh(x[0], out=y)
self._use_cudnn = False
self.retain_outputs((0,))
return y,
def backward(self, indexes, grad_outputs):
if self._use_cudnn:
x = self.get_retained_inputs()[0].data
else:
x = None
y = self.get_retained_outputs()[0]
gy = grad_outputs[0]
return TanhGrad(x).apply((y, gy))
class TanhGrad(function_node.FunctionNode):
def __init__(self, x):
super(TanhGrad, self).__init__()
# The original input `x` is only required for cuDNN.
# If it is None, this class does not use cuDNN.
# Note that x must be c-contiguous and it is checked
# in Tanh.forward_gpu.
self.x = x
def forward_cpu(self, inputs):
self.retain_inputs((0, 1))
y, gy = inputs
one = y.dtype.type(1)
return utils.force_array(gy * (one - y * y)),
def forward_gpu(self, inputs):
self.retain_inputs((0, 1))
y, gy = inputs
if (chainer.should_use_cudnn('==always') and
self.x is not None and gy.flags.c_contiguous):
gx = cudnn.activation_backward(self.x, y, gy, _mode)
else:
gx = cuda.elementwise(
'T y, T gy', 'T gx',
'gx = gy * (1 - y * y)',
'tanh_bwd')(y, gy)
return gx,
def backward(self, indexes, grad_outputs):
y, gy = self.get_retained_inputs()
g = grad_outputs[0]
y_mul_g = y * g
grad_y = -2 * gy * y_mul_g
ggy = g - y * y_mul_g
return grad_y, ggy
def tanh(x):
"""Elementwise hyperbolic tangent function.
.. math:: f(x)=\\tanh(x).
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`):
Input variable. A :math:`(s_1, s_2, ..., s_N)`-shaped float array.
Returns:
~chainer.Variable: Output variable. A
:math:`(s_1, s_2, ..., s_N)`-shaped float array.
.. admonition:: Example
>>> x = np.arange(-1, 4, 2).astype('f')
>>> x
array([-1., 1., 3.], dtype=float32)
>>> F.tanh(x).data
array([-0.7615942, 0.7615942, 0.9950548], dtype=float32)
"""
return Tanh().apply((x,))[0]