In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
# softmax function
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
outputs

array([0.65900114, 0.24243297, 0.09856589])

In [4]:
# same with pytorch
x = torch.tensor(x)
outputs = torch.softmax(x, dim=0)
outputs

tensor([0.6590, 0.2424, 0.0986], dtype=torch.float64)

In [5]:
# cross-entropy loss
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

In [6]:
# y must be one-hot encoded:
# -- correct class 0 = [1, 0, 0]
# -- correct class 1 = [0, 1, 0]
# -- correct class 2 = [0, 0, 1]
Y = np.array([1, 0, 0])

In [7]:
# y_pred contains probabilities
y_pred_good = np.array([0.65900114, 0.24243297, 0.09856589])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y, y_pred_good)
l2 = cross_entropy(Y, y_pred_bad)

print(f'Loss 1 numpy: {l1:.4f}')
print(f'Loss 2 numpy: {l2:.4f}')

Loss 1 numpy: 0.4170
Loss 2 numpy: 2.3026


In [8]:
# Some notes on pytorch implementation:
# -- 1. nn.CrossEntropyLoss already aplies Softmax function before Negative Log-likelihood
#       (so, we don't need softmax layer at the end of our loss function)
# -- 2. Y is NOT one-hot encoded, it only contains the correct class label (either 0 or 1 or 2)
# -- 3. Y_pred contains raw scores (logits), not the probabilities. 

In [9]:
loss = nn.CrossEntropyLoss()

In [10]:
Y = torch.tensor([0])

In [11]:
# dims = n_samples x n_classes
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

In [12]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'Loss 1 pytorch: {l1.item():.4f}')
print(f'Loss 2 pytorch: {l2.item():.4f}')

Loss 1 pytorch: 0.4170
Loss 2 pytorch: 1.8406


In [13]:
_, prediction1 = torch.max(Y_pred_good, 1)
_, prediction2 = torch.max(Y_pred_bad, 1)

print(prediction1)
print(prediction2)

tensor([0])
tensor([1])


In [14]:
# 3 samples
Y = torch.tensor([2, 0, 1])

In [15]:
# dims = n_samples * n_classes
Y_pred_good = torch.tensor([[0.1, 1.0, 2.0], [2.0, 1.0, 0.1], [1.0, 2.0, 0.1]])
Y_pred_bad = torch.tensor([[2.0, 0.3, 0.5], [0.3, 2.0, 0.5], [0.5, 0.3, 2.0]])

In [16]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'Loss 1 pytorch: {l1.item():.4f}')
print(f'Loss 2 pytorch: {l2.item():.4f}')

Loss 1 pytorch: 0.4170
Loss 2 pytorch: 1.9739


In [17]:
_, prediction1 = torch.max(Y_pred_good, 1)
_, prediction2 = torch.max(Y_pred_bad, 1)

print(prediction1)
print(prediction2)

tensor([2, 0, 1])
tensor([0, 1, 2])
