/
hinge.py
195 lines (155 loc) · 6.22 KB
/
hinge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import numpy
from chainer import cuda
from chainer import function
from chainer.utils import type_check
def _hinge_fwd_kernel():
return cuda.elementwise(
'S t', 'raw T bottom_diff',
'int ind[] = {i, t}; bottom_diff[ind] *= -1',
'hinge_fwd')
class Hinge(function.Function):
"""Hinge loss."""
def __init__(self, norm='L1', reduce='mean'):
if norm in ['L1', 'L2']:
self.norm = norm
else:
raise NotImplementedError("norm should be either 'L1' or 'L2'")
if reduce in ['mean', 'no']:
self.reduce = reduce
else:
raise ValueError(
"only 'mean' and 'no' are valid for 'reduce', but '%s' is "
'given' % reduce)
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
x_type, t_type = in_types
type_check.expect(
x_type.dtype == numpy.float32,
t_type.dtype.kind == 'i',
x_type.ndim == 2,
t_type.ndim == 1,
x_type.shape[0] == t_type.shape[0],
)
def forward_cpu(self, inputs):
x, t = inputs
num = len(x)
self.bottom_diff = numpy.copy(x)
self.bottom_diff[numpy.arange(num), t] *= -1
self.bottom_diff = numpy.maximum(0, 1 + self.bottom_diff)
if self.norm == 'L1':
loss = self.bottom_diff
elif self.norm == 'L2':
loss = self.bottom_diff ** 2
else:
raise NotImplementedError()
if self.reduce == 'mean':
loss = loss.sum() / num
return numpy.array(loss, dtype=x.dtype),
def forward_gpu(self, inputs):
x, t = inputs
num = x.dtype.type(len(x))
self.bottom_diff = cuda.cupy.maximum(
0, 1 + _hinge_fwd_kernel()(t, x.copy()))
if self.norm == 'L1':
loss = self.bottom_diff
elif self.norm == 'L2':
loss = self.bottom_diff ** 2
else:
raise NotImplementedError()
if self.reduce == 'mean':
loss = loss.sum() / num
return loss,
def backward_cpu(self, inputs, grad_outputs):
t, gloss = inputs[1], grad_outputs[0]
if self.reduce == 'mean':
gloss /= len(t)
self.bottom_diff[numpy.arange(len(t)), t] *= -1
if self.norm == 'L1':
gx = gloss * numpy.sign(self.bottom_diff)
elif self.norm == 'L2':
gx = 2 * gloss * self.bottom_diff
else:
raise NotImplementedError()
return gx, None
def backward_gpu(self, inputs, grad_outputs):
xp = cuda.get_array_module(*inputs)
t, gloss = inputs[1], grad_outputs[0]
if self.reduce == 'mean':
gloss /= len(t)
self.bottom_diff = _hinge_fwd_kernel()(t, self.bottom_diff)
if self.norm == 'L1':
gx = gloss * xp.sign(self.bottom_diff)
elif self.norm == 'L2':
gx = 2 * gloss * self.bottom_diff
else:
raise NotImplementedError()
return gx, None
def hinge(x, t, norm='L1', reduce='mean'):
"""Computes the hinge loss for a one-of-many classification task.
.. math::
L = \\frac{1}{N} \\sum_{n=1}^N \\sum_{k=1}^K \\left[
\\max(0, 1 - \\delta\\{t_n = k\\} x_{nk}) \\right]^p
where :math:`N` denotes the batch size and :math:`K` is the number of
classes of interest,
.. math::
\\delta \\{ {\\rm condition} \\} = \\left \\{ \\begin{array}{cc}
1 & {\\rm if~condition\ is\ true} \\\\
-1 & {\\rm otherwise,}
\\end{array} \\right.
and
.. math::
p = \\left \\{ \\begin{array}{cc}
1 & {\\rm if~norm} = {\\rm L1} \\\\
2 & {\\rm if~norm} = {\\rm L2.}
\\end{array} \\right.
Let the hinge loss function :math:`l(x, \\delta)` be
:math:`\\left[\\max(0, 1 - \\delta x) \\right]^p`.
When :math:`x` and :math:`\\delta` have the same sign (meaning
:math:`x` predicts the proper score for classification) and
:math:`|x| \geq 1`, the hinge loss :math:`l(x, \\delta) = 0`, but when
they have opposite sign, :math:`l(x, \\delta)` increases linearly
with :math:`x`.
The output is a variable whose value depends on the value of
the option ``reduce``. If it is ``'no'``, it holds the elementwise
loss values. If it is ``'mean'``, it takes the mean of loss values.
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray` of :class:`numpy.float`):
Input variable. The shape of ``x`` should be (:math:`N`, :math:`K`)
.
t (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray` of signed integer):
The :math:`N`-dimensional label vector with values
:math:`t_n \in \{0, 1, 2, \dots, K-1\}`.
The shape of ``t`` should be (:math:`N`,).
norm (string): Specifies norm type. Either ``'L1'`` or ``'L2'`` is
acceptable.
reduce (str): Reduction option. Its value must be either
``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised.
Returns:
~chainer.Variable:
A variable object holding a scalar array of the
hinge loss :math:`L`.
If ``reduce`` is ``'no'``, the output variable holds array
whose shape is same as one of (hence both of) input variables.
If it is ``'mean'``, the output variable holds a scalar value.
.. admonition:: Example
In this case, the batch size ``N`` is 2 and the number of classes ``K``
is 3.
>>> x = np.array([[-2.0, 3.0, 0.5],
... [5.0, 2.0, -0.5]]).astype('f')
>>> x
array([[-2. , 3. , 0.5],
[ 5. , 2. , -0.5]], dtype=float32)
>>> t = np.array([1, 0]).astype('i')
>>> t
array([1, 0], dtype=int32)
>>> F.hinge(x, t)
variable(2.5)
>>> F.hinge(x, t, reduce='no')
variable([[ 0. , 0. , 1.5],
[ 0. , 3. , 0.5]])
>>> F.hinge(x, t, norm='L2')
variable(5.75)
"""
return Hinge(norm, reduce)(x, t)