In [6]:
# Softmax and Cross-entropy loss are the most famous functions used in NN

import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('Softmax Numpy: ', outputs)

Softmax Numpy:  [0.65900114 0.24243297 0.09856589]


In [7]:
# In pytorch

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0) # dim=0 computes along the first axis [aka dimention]
print(outputs) # The sum of the 3 values will be 1

tensor([0.6590, 0.2424, 0.0986])


In [8]:
# Softmax, many times, is used along with the Cross-entropy Loss

In [9]:
# Cross-entropy in numpy

def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

# y must be 'one hot encoded'
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [14]:
# In pytorch

loss = nn.CrossEntropyLoss()

Y = torch.tensor([2, 0, 1]) # only the correct classes [not one-hot encoded]
# nsamples * nclasses = 3 samples * 3 possible classes [3 by 3]
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.1, 3.0, 0.1]])

loss1 = loss(Y_pred_good, Y)
loss2 = loss(Y_pred_bad, Y)

print(f'Loss1 torch: {loss1.item()}')
print(f'Loss2 torch: {loss2.item()}') # higher loss (more wrong predictions)

# PS: '_' is a convention to store the first value of the tuple that we do not care about (since we won't use it). You can call whatever though.
_, prediction1 = torch.max(Y_pred_good, 1) # '1' refers to the first dimention
_, prediction2 = torch.max(Y_pred_bad, 1)

print(prediction1) # predicted the correct ones
print(prediction2) # predicted only the last one right

Loss1 torch: 0.3018244206905365
Loss2 torch: 1.6241613626480103
tensor([2, 0, 1])
tensor([0, 2, 1])
