## CrossEntropyLoss, NLLLoss and LogSoftmax

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
torch.manual_seed(42)
batch_size, n_classes = 5, 3
x = torch.randn(batch_size, n_classes)
display(x)

tensor([[ 0.3367,  0.1288,  0.2345],
        [ 0.2303, -1.1229, -0.1863],
        [ 2.2082, -0.6380,  0.4617],
        [ 0.2674,  0.5349,  0.8094],
        [ 1.1103, -1.6898, -0.9890]])

In [3]:
torch.manual_seed(46)
target = torch.randint(n_classes, size=(batch_size,), dtype=torch.long)
display(target)

tensor([0, 1, 2, 0, 2])

### `softmax` + `nlog` (negative log-likelihood)

This version is most similar to the math formula, but not numerically stable.

In [4]:
def softmax(x): 
    return x.exp() / x.exp().sum(-1).unsqueeze(-1)

def nlog(output, target):
    return -output[range(target.shape[0]), target].log().mean()

In [5]:
pred = softmax(x)
display(pred)
print()
loss = nlog(pred, target)
display(loss)

tensor([[0.3683, 0.2992, 0.3325],
        [0.5215, 0.1348, 0.3438],
        [0.8114, 0.0471, 0.1415],
        [0.2484, 0.3246, 0.4271],
        [0.8451, 0.0514, 0.1036]])




tensor(1.7238)

### `logsoftmax` + `nlik` (negative log-likelihood)

`log(softmax(x))` is slower, and numerically unstable, use an alternative.

In [6]:
def logsoftmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def nlik(output, target):
    return -output[range(target.shape[0]), target].mean()

pred = logsoftmax(x)
display(pred)
print()
loss = nlik(pred, target)
loss

tensor([[-0.9988, -1.2067, -1.1011],
        [-0.6511, -2.0043, -1.0678],
        [-0.2090, -3.0552, -1.9555],
        [-1.3928, -1.1253, -0.8508],
        [-0.1684, -2.9684, -2.2676]])




tensor(1.7238)

### `CrossEntropyLoss`, `NLLLoss` and `LogSoftmax`

In [7]:
pred = nn.LogSoftmax(dim=-1)(x)
display(pred)
print()
loss = nn.NLLLoss()(pred,target)
display(loss)

tensor([[-0.9988, -1.2067, -1.1011],
        [-0.6511, -2.0043, -1.0678],
        [-0.2090, -3.0552, -1.9555],
        [-1.3928, -1.1253, -0.8508],
        [-0.1684, -2.9684, -2.2676]])




tensor(1.7238)

In [8]:
loss = nn.CrossEntropyLoss()(x,target)
display(loss)
print()
pred = nn.LogSoftmax(dim=-1)(pred)
display(pred)
print()
loss = nn.CrossEntropyLoss()(pred,target)
display(loss)

tensor(1.7238)




tensor([[-0.9988, -1.2067, -1.1011],
        [-0.6511, -2.0043, -1.0678],
        [-0.2090, -3.0552, -1.9555],
        [-1.3928, -1.1253, -0.8508],
        [-0.1684, -2.9684, -2.2676]])




tensor(1.7238)

### `F.log_softmax` + `F.nll_loss`

In [9]:
pred = F.log_softmax(x, dim=-1)
loss = F.nll_loss(pred, target.view(-1))
loss

tensor(1.7238)

### `F.cross_entropy`

In [10]:
F.cross_entropy(x, target.view(-1))

tensor(1.7238)