# Softmax and Loss Funcion
https://pytorch.org/tutorials/beginner/nlp/deep_learning_tutorial.html?highlight=loss%20function

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# create synthetic data
n = 5
x = torch.randn(n)
print(x)

tensor([-0.2097,  0.2226,  0.0553, -2.0127,  0.1066])


In [3]:
# val: maximum value
# c: the true class
val, c = torch.max(x, dim=0, keepdim=True)
print(val.item(), c.item())

0.22260449826717377 1


In [4]:
phi = F.softmax(x, dim=0)
print(phi.numpy())

[0.18583983 0.2863342  0.24222979 0.03062579 0.25497052]


In [5]:
# probability sum to 1
print(torch.sum(phi).item())

1.0000001192092896


In [6]:
# mathematically equivalent to log(softmax(x)), but faster
log_phi = F.log_softmax(x, dim=0)
print(log_phi.numpy())

[-1.6828701 -1.2505957 -1.4178685 -3.4859128 -1.3666074]


In [7]:
# no longer sum to 1
print(log_phi.sum().item())

-9.20385456085205


In [8]:
loss_fn = nn.NLLLoss()

In [9]:
print(loss_fn(log_phi.unsqueeze(0), c).item())

1.2505956888198853


In [10]:
# L(y_hat, y) = -log(y_hat)
# same as above
print(-log_phi[c].item())

1.2505956888198853


In [11]:
# loss = -x[c] + log(sum(exp(x)))
loss = - x[c] + torch.exp(x).sum().log()
print(loss.item())

1.2505958080291748


In [12]:
# pick any index which is not c
indices_not_c = [i for i in range(n) if i != c]
print(indices_not_c)
not_c = np.random.choice(indices_not_c)
print(not_c)

[0, 2, 3, 4]
2


In [13]:
class_ = torch.tensor([not_c])
print(loss_fn(log_phi.unsqueeze(0), class_).item())

1.4178684949874878


In [14]:
# same as above
print(-log_phi[not_c].item())

1.4178684949874878


In [15]:
loss = - x[not_c] + torch.exp(x).sum().log()
print(loss.item())

1.4178686141967773
