# Cross Entropy Loss

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

Cross Entropy

$$Cross Entropy = - \sum p(x) \cdot \log q(x)$$

## `nn.CrossEntropyLoss`

In [2]:
inputs = torch.randn(3, 5)
labels = torch.tensor([1, 0, 1], dtype=torch.long)

In [3]:
inputs, labels

(tensor([[-0.6722,  0.0933, -0.7828, -0.5380, -0.3814],
         [-1.0644, -0.0649,  0.6899, -0.5077, -0.4502],
         [-1.7857, -0.0663,  0.4620,  0.8843, -0.4773]]), tensor([1, 0, 1]))

In [4]:
nn.CrossEntropyLoss(reduction='none')(inputs, labels)

tensor([1.1104, 2.5718, 1.8125])

In [5]:
nn.CrossEntropyLoss()(inputs, labels)

tensor(1.8316)

解释

In [6]:
F.softmax(inputs, 1)

tensor([[0.1532, 0.3294, 0.1372, 0.1752, 0.2049],
        [0.0764, 0.2076, 0.4415, 0.1333, 0.1412],
        [0.0293, 0.1632, 0.2769, 0.4224, 0.1082]])

In [7]:
-torch.log(F.softmax(inputs, 1))[0,1]

tensor(1.1104)

In [8]:
F.nll_loss(F.log_softmax(inputs, 1), labels, reduction='none')[0]

tensor(1.1104)

In [9]:
F.nll_loss(F.log_softmax(inputs, 1), labels, reduction='none')

tensor([1.1104, 2.5718, 1.8125])

In [10]:
F.nll_loss(F.log_softmax(inputs, 1), labels)

tensor(1.8316)

* `softmax` 转换成概率分布
* `log` 概率分布转化成信息量
* `negative log likelyhood` 损失

> 自信息$I(x) = -\log p(x)$

## `nn.BCELoss`

$$Loss = y_n \cdot \log (x_n) + (1 - y_n) \cdot \log(1-x_n)$$

In [11]:
inputs = torch.randn(3)
labels = torch.empty(3).random_(2)

In [12]:
inputs, labels

(tensor([0.2211, 0.8756, 2.4896]), tensor([0., 0., 0.]))

In [13]:
nn.BCELoss(reduction='none')(torch.sigmoid(inputs), labels)

tensor([0.8098, 1.2238, 2.5693])

In [14]:
nn.BCELoss()(torch.sigmoid(inputs), labels)

tensor(1.5343)

解释

In [15]:
torch.sigmoid(inputs)

tensor([0.5551, 0.7059, 0.9234])

In [16]:
-torch.log(torch.sigmoid(inputs))

tensor([0.5887, 0.3483, 0.0797])

In [17]:
-torch.log(torch.ones_like(inputs) - torch.sigmoid(inputs))

tensor([0.8098, 1.2238, 2.5693])

In [18]:
F.binary_cross_entropy(torch.sigmoid(inputs), labels, reduction='none')

tensor([0.8098, 1.2238, 2.5693])

## `nn.BCEWithLogitsLoss`

In [19]:
inputs = torch.randn(3)
labels = torch.empty(3).random_(2)

In [20]:
inputs, labels

(tensor([ 0.5454,  0.9473, -0.5845]), tensor([1., 0., 0.]))

In [21]:
nn.BCEWithLogitsLoss(reduction='none')(inputs, labels)

tensor([0.4572, 1.2750, 0.4430])

In [22]:
nn.BCEWithLogitsLoss()(inputs, labels)

tensor(0.7251)

In [23]:
nn.BCEWithLogitsLoss(reduction='sum')(inputs,labels) / labels.shape[0]

tensor(0.7251)