### Softmax

Softmax forces all values into a probability distribution (between 0 and 1 and sum to 1). This can be used for multiclass problems.

In [5]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2., 1., 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

x = torch.tensor([2., 1., 0.1])
outputs = torch.softmax(x, dim=0)
print('softmax torch:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]
softmax torch: tensor([0.6590, 0.2424, 0.0986])


### Cross-Entropy

Cross-entropy is a loss function which can calculate a loss between some one-hot-encoded y categorical labels, and predicted probabilities for each of these classes.

Using numpy

In [7]:
def cross_entropy(actual, predicted):
    return -np.sum(actual * np.log(predicted))

# OHE y classes (actuals)
y = np.array([1, 0, 0])

# Predicted probabilities for the 3 classses
y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

loss_good = cross_entropy(y, y_pred_good)
loss_bad = cross_entropy(y, y_pred_bad)

print(f"Loss good: {loss_good:.3f}")
print(f"Loss bad: {loss_bad:.3f}")

Loss good: 0.357
Loss bad: 2.303


Pytorch

In [15]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss()

y = torch.tensor([0])

y_pred_good = torch.tensor([[2., 1., 0.1]])
y_pred_bad = torch.tensor([[.5, 2, 0.3]])

loss_good = loss(y_pred_good, y)
loss_bad = loss(y_pred_bad, y)

print(f"Loss good: {loss_good.item():.3f}")
print(f"Loss bad: {loss_bad.item():.3f}")

_, predictions_good = torch.max(y_pred_good, 1)
_, predictions_bad = torch.max(y_pred_bad, 1)

print(predictions_good)
print(predictions_bad)

Loss good: 0.417
Loss bad: 1.841
tensor([0])
tensor([1])


Multiclass neural network example

In [16]:
import torch
import torch.nn as nn


class Network(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.l1
        x = self.relu
        x = self.l2
        return x

model = Network(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()