In [1]:
import torch
import torch.nn.functional as F
from torch import nn, FloatTensor, IntTensor

In [137]:
class CircleLoss(nn.Module):
    """
    Circle loss. Expects as input two embeddings: positive and negative.

    :param m: The relaxation factor that controls the radius of the decision boundary. The paper uses 0.25 for face recognition, and 0.4 for fine-grained image retrieval (images of birds, cars, and online products).
    :param gamma: The scale factor that determines the largest scale of each similarity score. The paper uses 256 for face recognition, and 80 for fine-grained image retrieval.

    Implementation of formula described in:
    https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#circleloss
    """
    def __init__(self, m = 0.4, gamma = 80):

        super(CircleLoss, self).__init__()
        self.m = m
        self.gamma = gamma

    def forward(self, positive: FloatTensor, negative: FloatTensor):

        ap = F.relu(1 + self.m - positive.detach())
        an = F.relu(negative.detach() + self.m)

        p_part = -ap * self.gamma * (positive - 1 + self.m)
        n_part = an * self.gamma * (negative - self.m)

        # loss = F.softplus(torch.log(p_part.exp().sum(1)) + torch.log(n_part.exp().sum(1)))
        loss = F.softplus(torch.logsumexp(p_part, dim=1) + torch.logsumexp(n_part, dim=1))
        # softplus can be replaced with any differentiable activation function, logsumexp is not strictly convex

        return loss.mean()

CircleLoss может принимать слишком высокие значения в зависимости от m и gamma, лучше их делать меньше, так же лосс вероятно надо склейлить

In [154]:
pos = torch.randn([10, 256])
neg = torch.randn([10, 256])

In [160]:
CircleLoss(m=0.4, gamma=80)(pos, neg)

tensor(1734.1755)

In [118]:
pos = torch.ones([10, 256], dtype=torch.float32)
neg = torch.ones([10, 256], dtype=torch.float32)

In [119]:
CircleLoss(m=0.4, gamma=80)(pos, neg)

tensor(65.4904)

In [112]:
F.softplus(torch.tensor(0.0))

tensor(0.6931)