/
triplet.py
152 lines (121 loc) · 5.35 KB
/
triplet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import numpy
import chainer
from chainer.backends import cuda
from chainer import function_node
from chainer.utils import type_check
class Triplet(function_node.FunctionNode):
"""Triplet loss function."""
def __init__(self, margin, reduce='mean'):
if margin <= 0:
raise ValueError('margin should be positive value.')
self.margin = margin
if reduce not in ('mean', 'no'):
raise ValueError(
"only 'mean' and 'no' are valid for 'reduce', but '%s' is "
'given' % reduce)
self.reduce = reduce
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 3)
type_check.expect(
in_types[0].dtype == numpy.float32,
in_types[1].dtype == numpy.float32,
in_types[2].dtype == numpy.float32,
in_types[0].shape == in_types[1].shape,
in_types[0].shape == in_types[2].shape,
in_types[0].shape[0] > 0
)
def forward(self, inputs):
xp = cuda.get_array_module(*inputs)
anchor, positive, negative = inputs
dist = xp.sum(
(anchor - positive) ** 2 - (anchor - negative) ** 2,
axis=1) + self.margin
self.dist_hinge = xp.maximum(dist, 0)
if self.reduce == 'mean':
N = anchor.shape[0]
loss = xp.sum(self.dist_hinge) / N
else:
loss = self.dist_hinge
self.retain_inputs((0, 1, 2))
return xp.array(loss, dtype=numpy.float32),
def backward(self, indexes, grad_outputs):
anchor, positive, negative = self.get_retained_inputs()
N = anchor.shape[0]
x_dim = anchor.shape[1]
xp = cuda.get_array_module(anchor)
tmp = xp.repeat(self.dist_hinge[:, None], x_dim, axis=1)
mask = xp.array(tmp > 0, dtype=numpy.float32)
gy, = grad_outputs
if self.reduce == 'mean':
g = gy / N
else:
g = gy[:, None]
tmp = 2 * chainer.functions.broadcast_to(g, mask.shape) * mask
ret = []
if 0 in indexes:
ret.append(tmp * (negative - positive))
if 1 in indexes:
ret.append(tmp * (positive - anchor))
if 2 in indexes:
ret.append(tmp * (anchor - negative))
return ret
def triplet(anchor, positive, negative, margin=0.2, reduce='mean'):
"""Computes triplet loss.
It takes a triplet of variables as inputs, :math:`a`, :math:`p` and
:math:`n`: anchor, positive example and negative example respectively.
The triplet defines a relative similarity between samples.
Let :math:`N` and :math:`K` denote mini-batch size and the dimension of
input variables, respectively. The shape of all input variables should be
:math:`(N, K)`.
.. math::
L(a, p, n) = \\frac{1}{N} \\left( \\sum_{i=1}^N \\max \\{d(a_i, p_i)
- d(a_i, n_i) + {\\rm margin}, 0\\} \\right)
where :math:`d(x_i, y_i) = \\| {\\bf x}_i - {\\bf y}_i \\|_2^2`.
The output is a variable whose value depends on the value of
the option ``reduce``. If it is ``'no'``, it holds the elementwise
loss values. If it is ``'mean'``, this function takes a mean of
loss values.
Args:
anchor (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): The anchor example variable. The shape
should be :math:`(N, K)`, where :math:`N` denotes the minibatch
size, and :math:`K` denotes the dimension of the anchor.
positive (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): The positive example variable. The shape
should be the same as anchor.
negative (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): The negative example variable. The shape
should be the same as anchor.
margin (float): A parameter for triplet loss. It should be a positive
value.
reduce (str): Reduction option. Its value must be either
``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised.
Returns:
~chainer.Variable:
A variable holding a scalar that is the loss value
calculated by the above equation.
If ``reduce`` is ``'no'``, the output variable holds array
whose shape is same as one of (hence both of) input variables.
If it is ``'mean'``, the output variable holds a scalar value.
.. note::
This cost can be used to train triplet networks. See `Learning \
Fine-grained Image Similarity with Deep Ranking \
<https://arxiv.org/abs/1404.4661>`_ for details.
.. admonition:: Example
>>> anchor = np.array([[-2.0, 3.0, 0.5], [5.0, 2.0, -0.5]]).\
astype(np.float32)
>>> pos = np.array([[-2.1, 2.8, 0.5], [4.9, 2.0, -0.4]]).\
astype(np.float32)
>>> neg = np.array([[-2.1, 2.7, 0.7], [4.9, 2.0, -0.7]]).\
astype(np.float32)
>>> F.triplet(anchor, pos, neg)
variable(0.14000003)
>>> y = F.triplet(anchor, pos, neg, reduce='no')
>>> y.shape
(2,)
>>> y.data
array([0.11000005, 0.17 ], dtype=float32)
>>> F.triplet(anchor, pos, neg, margin=0.5) # harder penalty
variable(0.44000003)
"""
return Triplet(margin, reduce).apply((anchor, positive, negative))[0]