chainer/functions/loss/hinge.py

import numpy

from chainer import cuda
from chainer import function
from chainer.utils import type_check


def _hinge_fwd_kernel():
    return cuda.elementwise(
        'S t', 'raw T bottom_diff',
        'int ind[] = {i, t}; bottom_diff[ind] *= -1',
        'hinge_fwd')


class Hinge(function.Function):

    """Hinge loss."""

    def __init__(self, norm='L1', reduce='mean'):
        if norm in ['L1', 'L2']:
            self.norm = norm
        else:
            raise NotImplementedError("norm should be either 'L1' or 'L2'")

        if reduce in ['mean', 'no']:
            self.reduce = reduce
        else:
            raise ValueError(
                "only 'mean' and 'no' are valid for 'reduce', but '%s' is "
                'given' % reduce)

    def check_type_forward(self, in_types):
        type_check.expect(in_types.size() == 2)

        x_type, t_type = in_types
        type_check.expect(
            x_type.dtype == numpy.float32,
            t_type.dtype.kind == 'i',
            x_type.ndim == 2,
            t_type.ndim == 1,
            x_type.shape[0] == t_type.shape[0],
        )

    def forward_cpu(self, inputs):
        x, t = inputs
        num = len(x)
        self.bottom_diff = numpy.copy(x)
        self.bottom_diff[numpy.arange(num), t] *= -1
        self.bottom_diff = numpy.maximum(0, 1 + self.bottom_diff)

        if self.norm == 'L1':
            loss = self.bottom_diff
        elif self.norm == 'L2':
            loss = self.bottom_diff ** 2
        else:
            raise NotImplementedError()

        if self.reduce == 'mean':
            loss = loss.sum() / num

        return numpy.array(loss, dtype=x.dtype),

    def forward_gpu(self, inputs):
        x, t = inputs
        num = x.dtype.type(len(x))
        self.bottom_diff = cuda.cupy.maximum(
            0, 1 + _hinge_fwd_kernel()(t, x.copy()))
        if self.norm == 'L1':
            loss = self.bottom_diff
        elif self.norm == 'L2':
            loss = self.bottom_diff ** 2
        else:
            raise NotImplementedError()

        if self.reduce == 'mean':
            loss = loss.sum() / num

        return loss,

    def backward_cpu(self, inputs, grad_outputs):
        t, gloss = inputs[1], grad_outputs[0]

        if self.reduce == 'mean':
            gloss /= len(t)

        self.bottom_diff[numpy.arange(len(t)), t] *= -1
        if self.norm == 'L1':
            gx = gloss * numpy.sign(self.bottom_diff)
        elif self.norm == 'L2':
            gx = 2 * gloss * self.bottom_diff
        else:
            raise NotImplementedError()

        return gx, None

    def backward_gpu(self, inputs, grad_outputs):
        xp = cuda.get_array_module(*inputs)
        t, gloss = inputs[1], grad_outputs[0]

        if self.reduce == 'mean':
            gloss /= len(t)

        self.bottom_diff = _hinge_fwd_kernel()(t, self.bottom_diff)
        if self.norm == 'L1':
            gx = gloss * xp.sign(self.bottom_diff)
        elif self.norm == 'L2':
            gx = 2 * gloss * self.bottom_diff
        else:
            raise NotImplementedError()

        return gx, None


def hinge(x, t, norm='L1', reduce='mean'):
    """Computes the hinge loss for a one-of-many classification task.

        .. math::
            L = \\frac{1}{N} \\sum_{n=1}^N \\sum_{k=1}^K \\left[
            \\max(0, 1 - \\delta\\{t_n = k\\} x_{nk}) \\right]^p

        where :math:`N` denotes the batch size and :math:`K` is the number of
        classes of interest,

        .. math::
            \\delta \\{ {\\rm condition} \\} = \\left \\{ \\begin{array}{cc}
            1 & {\\rm if~condition\ is\ true} \\\\
            -1 & {\\rm otherwise,}
            \\end{array} \\right.

        and

        .. math::
            p = \\left \\{ \\begin{array}{cc}
            1 & {\\rm if~norm} = {\\rm L1} \\\\
            2 & {\\rm if~norm} = {\\rm L2.}
            \\end{array} \\right.

        Let the hinge loss function :math:`l(x, \\delta)` be
        :math:`\\left[\\max(0, 1 - \\delta x) \\right]^p`.
        When :math:`x` and :math:`\\delta` have the same sign (meaning
        :math:`x` predicts the proper score for classification) and
        :math:`|x| \geq 1`, the hinge loss :math:`l(x, \\delta) = 0`, but when
        they have opposite sign, :math:`l(x, \\delta)` increases linearly
        with :math:`x`.

        The output is a variable whose value depends on the value of
        the option ``reduce``. If it is ``'no'``, it holds the elementwise
        loss values. If it is ``'mean'``, it takes the mean of loss values.

    Args:
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray` of :class:`numpy.float`):
            Input variable. The shape of ``x`` should be (:math:`N`, :math:`K`)
            .
        t (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray` of signed integer):
            The :math:`N`-dimensional label vector with values
            :math:`t_n \in \{0, 1, 2, \dots, K-1\}`.
            The shape of ``t`` should be (:math:`N`,).
        norm (string): Specifies norm type. Either ``'L1'`` or ``'L2'`` is
            acceptable.
        reduce (str): Reduction option. Its value must be either
            ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised.

    Returns:
        ~chainer.Variable:
            A variable object holding a scalar array of the
            hinge loss :math:`L`.
            If ``reduce`` is ``'no'``, the output variable holds array
            whose shape is same as one of (hence both of) input variables.
            If it is ``'mean'``, the output variable holds a scalar value.

    .. admonition:: Example

        In this case, the batch size ``N`` is 2 and the number of classes ``K``
        is 3.

        >>> x = np.array([[-2.0, 3.0, 0.5],
        ...               [5.0, 2.0, -0.5]]).astype('f')
        >>> x
        array([[-2. ,  3. ,  0.5],
               [ 5. ,  2. , -0.5]], dtype=float32)
        >>> t = np.array([1, 0]).astype('i')
        >>> t
        array([1, 0], dtype=int32)
        >>> F.hinge(x, t)
        variable(2.5)
        >>> F.hinge(x, t, reduce='no')
        variable([[ 0. ,  0. ,  1.5],
                  [ 0. ,  3. ,  0.5]])
        >>> F.hinge(x, t, norm='L2')
        variable(5.75)

    """
    return Hinge(norm, reduce)(x, t)