-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET -1030] Cosine Embedding Loss #12750
Changes from 32 commits
b3b5de0
eb9b9b4
7fdd85d
013a604
aac12ad
1c97924
9766983
f05eb7b
c02e111
c10f1ef
95dd2a7
01607b4
c8bca0b
5194cd8
c01f8cb
4b3fe81
78bd725
3b3e117
2df6953
5c642cb
4be5104
410a708
1f48429
b618b61
ed762e5
89aafbc
4a3167b
d80baac
b36e097
c195ed0
308666b
16c3ecd
2dfeaa2
0bd4b24
ca030a7
ede1588
67572c5
55d4b1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ | |
'SigmoidBinaryCrossEntropyLoss', 'SigmoidBCELoss', | ||
'SoftmaxCrossEntropyLoss', 'SoftmaxCELoss', | ||
'KLDivLoss', 'CTCLoss', 'HuberLoss', 'HingeLoss', | ||
'SquaredHingeLoss', 'LogisticLoss', 'TripletLoss', 'PoissonNLLLoss'] | ||
'SquaredHingeLoss', 'LogisticLoss', 'TripletLoss', 'PoissonNLLLoss', 'CosineEmbeddingLoss'] | ||
|
||
import numpy as np | ||
from .. import ndarray | ||
|
@@ -767,3 +767,71 @@ def hybrid_forward(self, F, pred, target, sample_weight=None, epsilon=1e-08): | |
loss += stirling_factor | ||
loss = _apply_weighting(F, loss, self._weight, sample_weight) | ||
return F.mean(loss) | ||
|
||
|
||
class CosineEmbeddingLoss(Loss): | ||
r"""For a target label 1 or -1, vectors input1 and inout2, the function computes the cosine distance | ||
between the vectors. This can be interpreted as how similar/dissimilar two input vectors are. | ||
|
||
.. math:: | ||
|
||
L = \sum_i \begin{cases} 1 - {cos\_sim({input1}_i, {input2}_i)} & \text{ if } {label}_i = 1\\ | ||
{cos\_sim({input1}_i, {input2}_i)} & \text{ if } {label}_i = -1 \end{cases}\\ | ||
cos\_sim(input1, input2) = \frac{{input1}_i.{input2}_i}{||{input1}_i||.||{input2}_i||} | ||
|
||
`input1`, `input2` can have arbitrary shape as long as they have the same number of elements. | ||
|
||
Parameters | ||
---------- | ||
weight : float or None | ||
Global scalar weight for loss. | ||
batch_axis : int, default 0 | ||
The axis that represents mini-batch. | ||
margin : float | ||
Margin of separation between correct and incorrect pair. | ||
|
||
|
||
Inputs: | ||
- **input1**: a tensor with arbitrary shape | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update document on param names |
||
- **input2**: another tensor with same shape as pred to which input1 is | ||
compared for similarity and loss calculation | ||
- **label**: A 1-D tensor indicating for each pair input1 and input2, target label is 1 or -1 | ||
- **sample_weight**: element-wise weighting tensor. Must be broadcastable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to be added to hybrid_forward. Could you also put this after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed this. Thanks |
||
to the same shape as input1. For example, if input1 has shape (64, 10) | ||
and you want to weigh each sample in the batch separately, | ||
sample_weight should have shape (64, 1). | ||
|
||
Outputs: | ||
- **loss**: The loss tensor with shape (batch_size,). | ||
""" | ||
def __init__(self, weight=None, batch_axis=0, margin=0, **kwargs): | ||
super(CosineEmbeddingLoss, self).__init__(weight, batch_axis, **kwargs) | ||
self._margin = margin | ||
|
||
def hybrid_forward(self, F, input1, input2, label, sample_weight=None): | ||
input1 = _reshape_like(F, input1, input2) | ||
label = label.reshape((-1, 1)) | ||
cos_sim = self._cosine_similarity(F, input1, input2) | ||
y_1 = label == 1 | ||
y_minus_1 = label == -1 | ||
cos_sim_a = (1 - cos_sim) * y_1 | ||
|
||
if F is ndarray: | ||
z_array = F.array([0]) | ||
else: | ||
z_array = F.zeros((1, 1)) | ||
cos_sim_b = F.broadcast_maximum(z_array, y_minus_1 * (cos_sim - self._margin), axis=1) | ||
loss = cos_sim_a + cos_sim_b | ||
loss = _apply_weighting(F, loss, self._weight, sample_weight) | ||
return loss | ||
|
||
def _cosine_similarity(self, F, x, y, axis=-1): | ||
# Calculates the cosine similarity between 2 vectors | ||
x_norm = F.norm(x, axis=axis).reshape(-1, 1) | ||
y_norm = F.norm(y, axis=axis).reshape(-1, 1) | ||
x_dot_y = F.sum(x*y, axis=axis).reshape(-1, 1) | ||
if F is ndarray: | ||
eps_arr = F.array([1e-12]) | ||
else: | ||
eps_arr = F.full((1, 1), 1e-12) | ||
return (x_dot_y / F.broadcast_maximum(x_norm * y_norm, eps_arr)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -349,6 +349,23 @@ def test_triplet_loss(): | |
assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 | ||
|
||
@with_seed() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add additional tests for label = -1 and hybridization. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
def test_cosine_loss(): | ||
#Generating samples | ||
input1 = mx.nd.random.randn(3, 2) | ||
input2 = mx.nd.random.randn(3, 2) | ||
label = mx.nd.sign(mx.nd.random.randn(input1.shape[0])) | ||
#Calculating loss from cosine embedding loss function in Gluon | ||
Loss = gluon.loss.CosineEmbeddingLoss() | ||
loss = Loss(input1, input2, label) | ||
|
||
# Calculating the loss Numpy way | ||
numerator = mx.nd.sum(input1 * input2, keepdims=True, axis=1) | ||
denominator = mx.nd.sqrt(mx.nd.sum(input1**2, axis=1, keepdims=True)) \ | ||
* mx.nd.sqrt(mx.nd.sum(input2**2, axis=1, keepdims=True)) | ||
numpy_loss = mx.nd.where(label == 1, 1-numerator/denominator, \ | ||
mx.nd.broadcast_maximum(mx.nd.array([0]), numerator/denominator, axis=1)) | ||
assert_almost_equal(loss.asnumpy(), numpy_loss.asnumpy(), rtol=1e-3, atol=1e-5) | ||
|
||
def test_poisson_nllloss(): | ||
pred = mx.nd.random.normal(shape=(3, 4)) | ||
min_pred = mx.nd.min(pred) | ||
|
@@ -404,6 +421,7 @@ def test_poisson_nllloss_mod(): | |
optimizer='adam') | ||
assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: additional empty line not needed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. It is not required. Following the other test modules. They have a 2 lines gap after the last test function and main function. |
||
if __name__ == '__main__': | ||
import nose | ||
nose.runmodule() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: typo input2