-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
softmax.py
115 lines (91 loc) · 3.51 KB
/
softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import chainer
from chainer import backend
from chainer.backends import cuda
from chainer import function_node
import chainer.functions
from chainer.utils import type_check
if cuda.cudnn_enabled:
cudnn = cuda.cudnn
_algorithm = cuda.libcudnn.CUDNN_SOFTMAX_ACCURATE
class Softmax(function_node.FunctionNode):
"""Softmax activation function."""
def __init__(self, axis=1):
self.axis = axis
def check_type_forward(self, in_types):
type_check._argname(in_types, ('x',))
x_type, = in_types
type_check.expect(
x_type.dtype.kind == 'f',
-x_type.ndim <= self.axis < x_type.ndim,
)
def forward(self, x):
xp = backend.get_array_module(*x)
if xp is cuda.cupy and chainer.should_use_cudnn('>=auto'):
y = cudnn.softmax_forward(x[0], self.axis, _algorithm)
else:
y = x[0] - x[0].max(axis=self.axis, keepdims=True)
xp.exp(y, out=y)
y /= y.sum(axis=self.axis, keepdims=True)
self.retain_outputs((0,))
return y,
def backward(self, indexes, grad_outputs):
y = self.get_retained_outputs()[0]
gy, = grad_outputs
return _SoftmaxGrad(self.axis).apply((y, gy))
class _SoftmaxGrad(function_node.FunctionNode):
def __init__(self, axis):
self.axis = axis
def forward(self, inputs):
self.retain_inputs((0, 1))
y, gy = inputs
xp = backend.get_array_module(*y)
if xp is cuda.cupy and chainer.should_use_cudnn('>=auto'):
gx = cudnn.softmax_backward(y, gy, self.axis, _algorithm)
else:
gx = y * gy
sumdx = gx.sum(axis=self.axis, keepdims=True)
gx -= y * sumdx
return gx,
def backward(self, indexes, grad_outputs):
y, gy = self.get_retained_inputs()
ggx, = grad_outputs
gs = chainer.functions.sum(ggx * y, axis=self.axis, keepdims=True)
ga = ggx - chainer.functions.broadcast_to(gs, gy.shape)
ret = []
if 0 in indexes:
s = chainer.functions.broadcast_to(chainer.functions.sum(
y * gy, axis=self.axis, keepdims=True), gy.shape)
gy2 = ga * gy - ggx * s
ret.append(gy2)
if 1 in indexes:
ggy = ga * y
ret.append(ggy)
return tuple(ret)
def softmax(x, axis=1):
"""Softmax function.
This function computes its softmax along an axis. Let
:math:`c = (c_1, c_2, \\dots, c_D)` be the slice of ``x`` along with
the axis. For each slice :math:`c`, it computes the function :math:`f(c)`
defined as :math:`f(c)={\\exp(c) \\over \\sum_{d} \\exp(c_d)}`.
Args:
x (:class:`~chainer.Variable` or :ref:`ndarray`):
Input variable.
A :math:`n`-dimensional (:math:`n \\geq 2`) float array.
axis (int): The axis along which the softmax is to be computed.
Returns:
~chainer.Variable: Output variable.
A :math:`n`-dimensional (:math:`n \\geq 2`) float array, which is the
same shape with x.
.. admonition:: Example
>>> x = np.array([[0, 1, 2], [0, 2, 4]], np.float32)
>>> x
array([[0., 1., 2.],
[0., 2., 4.]], dtype=float32)
>>> y = F.softmax(x, axis=1)
>>> y.array
array([[0.09003057, 0.24472848, 0.66524094],
[0.01587624, 0.11731043, 0.86681336]], dtype=float32)
>>> F.sum(y, axis=1).array
array([1., 1.], dtype=float32)
"""
return Softmax(axis=axis).apply((x,))[0]