In [1]:
import torch
import torch.nn.functional as F

## Softmax

In [2]:
data = torch.FloatTensor([1, 2, 1])
F.softmax(data, dim=-1)

tensor([0.2119, 0.5761, 0.2119])

In [3]:
data = torch.FloatTensor([
    [1, 2, 1],
    [2, 3, 4]
])
F.softmax(data, dim=-1)

tensor([[0.2119, 0.5761, 0.2119],
        [0.0900, 0.2447, 0.6652]])

In [4]:
# let implement a softmax function
def calc_softmax(data, dim=-1):
    e_data = torch.exp(data)
    return e_data / torch.sum(e_data, dim=dim, keepdim=True)

data = torch.FloatTensor([1, 2, 1])
calc_softmax(data)

tensor([0.2119, 0.5761, 0.2119])

In [5]:
data = torch.FloatTensor([
    [1, 2, 1],
    [2, 3, 4]
])
calc_softmax(data)

tensor([[0.2119, 0.5761, 0.2119],
        [0.0900, 0.2447, 0.6652]])

## Cross Entropy Loss

In [6]:
Y = torch.tensor([1, 0, 2])
yhat = torch.FloatTensor([
    [0.1, 0.8, 0.7],
    [0.4, 0.5, 0.5],
    [0.9, 0.1, 0.5]
])
yhat = calc_softmax(yhat)
yhat

tensor([[0.2068, 0.4164, 0.3768],
        [0.3115, 0.3443, 0.3443],
        [0.4718, 0.2120, 0.3162]])

In [7]:
y_one_hot = F.one_hot(Y, 3)
y_one_hot

tensor([[0, 1, 0],
        [1, 0, 0],
        [0, 0, 1]])

In [8]:
# provided value
def cross_entropy(y_hat, y):
    return - torch.log(y_hat[range(len(y_hat)), y]).mean()

In [9]:
cross_entropy(yhat, Y)

tensor(1.0646)

**Cross Entropy Implementation in Simpler Terms**

Based on what's available on [D2L.ai](https://d2l.ai/chapter_linear-classification/softmax-regression.html#log-likelihood)

![](https://i.imgur.com/ivCsEX6.png)

In [10]:
yhat

tensor([[0.2068, 0.4164, 0.3768],
        [0.3115, 0.3443, 0.3443],
        [0.4718, 0.2120, 0.3162]])

In [11]:
torch.log(yhat)

tensor([[-1.5761, -0.8761, -0.9761],
        [-1.1664, -1.0664, -1.0664],
        [-0.7513, -1.5513, -1.1513]])

In [12]:
torch.log(yhat) * y_one_hot

tensor([[-0.0000, -0.8761, -0.0000],
        [-1.1664, -0.0000, -0.0000],
        [-0.0000, -0.0000, -1.1513]])

In [13]:
(torch.log(yhat) * y_one_hot).sum(-1)

tensor([-0.8761, -1.1664, -1.1513])

In [14]:
(torch.log(yhat) * y_one_hot).sum(-1).mean() * -1

tensor(1.0646)

In [15]:
def calc_cross_entropy(yhat, targets):
    return (torch.log(yhat) * targets).sum(-1).mean() * -1

In [16]:
calc_cross_entropy(yhat, y_one_hot)

tensor(1.0646)