/
sigmoid_cross_entropy.py
170 lines (135 loc) · 5.77 KB
/
sigmoid_cross_entropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import numpy
import chainer
from chainer.backends import cuda
from chainer import function_node
from chainer.functions.activation import sigmoid
from chainer import utils
from chainer.utils import type_check
class SigmoidCrossEntropy(function_node.FunctionNode):
"""Sigmoid activation followed by a sigmoid cross entropy loss."""
ignore_label = -1
def __init__(self, normalize=True, reduce='mean'):
self.normalize = normalize
if reduce not in ('mean', 'no'):
raise ValueError(
"only 'mean' and 'no' are valid for 'reduce', but '%s' is "
'given' % reduce)
self.reduce = reduce
self.count = None
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
x_type, t_type = in_types
type_check.expect(
x_type.dtype == numpy.float32,
t_type.dtype.kind == 'i',
x_type.shape == t_type.shape
)
def forward(self, inputs):
self.retain_inputs((0, 1))
xp = cuda.get_array_module(*inputs)
x, t = inputs
self.ignore_mask = (t != self.ignore_label)
# stable computation of the cross entropy.
loss = -(
self.ignore_mask *
(x * (t - (x >= 0)) - xp.log1p(xp.exp(-xp.abs(x)))))
if not self.reduce == 'mean':
return utils.force_array(loss.astype(x.dtype)),
if self.normalize:
count = xp.maximum(1, self.ignore_mask.sum())
else:
count = max(1, len(x))
self.count = count
return utils.force_array(
xp.divide(xp.sum(loss), self.count, dtype=x.dtype)),
def backward(self, inputs, grad_outputs):
x, t = self.get_retained_inputs()
gy, = grad_outputs
gx, = SigmoidCrossEntropyGrad(
self.reduce, self.count, self.ignore_mask, t.data).apply((x, gy))
return gx, None
class SigmoidCrossEntropyGrad(function_node.FunctionNode):
"""Sigmoid cross entropy gradient function."""
def __init__(self, reduce, count, ignore_mask, t):
self.reduce = reduce
self.count = count
self.ignore_mask = ignore_mask
self.t = t
def forward(self, inputs):
self.retain_inputs((0, 1))
xp = cuda.get_array_module(*inputs)
x, gy = inputs
y, = sigmoid.Sigmoid().forward((x,))
if self.reduce == 'mean':
gx = xp.divide(
gy * self.ignore_mask * (y - self.t), self.count,
dtype=y.dtype)
else:
gx = (gy * self.ignore_mask * (y - self.t)).astype(y.dtype)
return gx,
def backward(self, indexes, grad_outputs):
ggx, = grad_outputs
x, gy = self.get_retained_inputs()
y = chainer.functions.sigmoid(x)
yp = y * (1 - y)
gx = yp * chainer.functions.broadcast_to(gy, yp.shape)
ggy = y - self.t.astype(y.dtype)
gx *= self.ignore_mask * ggx
ggy *= self.ignore_mask * ggx
if self.reduce == 'mean':
gx /= self.count
ggy = chainer.functions.sum(ggy) / self.count
return gx, ggy
def sigmoid_cross_entropy(x, t, normalize=True, reduce='mean'):
"""Computes cross entropy loss for pre-sigmoid activations.
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): A variable object holding a matrix whose
(i, j)-th element indicates the unnormalized log probability of
the j-th unit at the i-th example.
t (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): A variable object holding a matrix whose
(i, j)-th element indicates a signed integer vector of
ground truth labels 0 or 1.
If ``t[i, j] == -1``, corresponding ``x[i, j]`` is ignored.
Loss is zero if all ground truth labels are ``-1``.
normalize (bool): Variable holding a boolean value which
determines the normalization constant. If true, this function
normalizes the cross entropy loss across all instances. If else,
it only normalizes along a batch size.
reduce (str): Variable holding a ``str`` which
determines whether to reduce the shape of the input.
If it is ``'mean'``, it computes the sum of cross entropy
and normalize it according to ``normalize`` option.
If is is ``'no'``, this function computes cross entropy for each
instance and does not normalize it (``normalize`` option is
ignored). In this case, the loss value of the ignored instance,
which has ``-1`` as its target value, is set to ``0``.
Returns:
Variable: A variable object holding an array of the cross entropy.
If ``reduce`` is ``'mean'``, it is a scalar array.
If ``reduce`` is ``'no'``, the shape is same as ``x``.
.. note::
This function is differentiable only by ``x``.
.. admonition:: Example
>>> x = np.array([[-2.0, 3.0, 0.5], [5.0, 2.0, -0.5]]).\
astype(np.float32)
>>> x
array([[-2. , 3. , 0.5],
[ 5. , 2. , -0.5]], dtype=float32)
>>> t = np.array([[0, 1, 0], [1, 1, -1]]).astype(np.int32)
>>> t
array([[ 0, 1, 0],
[ 1, 1, -1]], dtype=int32)
>>> F.sigmoid_cross_entropy(x, t)
variable(0.25664714)
>>> F.sigmoid_cross_entropy(x, t, normalize=False)
variable(0.64161783)
>>> y = F.sigmoid_cross_entropy(x, t, reduce='no')
>>> y.shape
(2, 3)
>>> y.data
array([[ 0.126928 , 0.04858735, 0.974077 ],
[ 0.00671535, 0.126928 , -0. ]], dtype=float32)
"""
return SigmoidCrossEntropy(normalize, reduce).apply((x, t))[0]