# Softmax
---
It is a function that turns a vector of `K` real values into a vector of `K` real values that sum to 1. The inputs can be positive, negative or zero or greater than 1, so the softmax function transforms them into values between 0 and 1, so they can be interpreted as probabilities. The softmax function can be used in a classifier only when the classes are mutually exclusive. Many multi-layer neural networks end in a penultimate layer which outputs real-valued scores that are not conveniently scaled and which may be difficult to work with.

In [7]:
import torch
import torch.nn as nn
import numpy as np

In [5]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
tensor = torch.tensor([2.0, 1.0, 0.1])
output = torch.softmax(tensor, dim=0)

print(f"Vector: {x}")
print(f"Softmax Output: {softmax(x)}")
print(f"Softmax Pytorch output: {output}")

Vector: [2.  1.  0.1]
Softmax Output: [0.65900114 0.24243297 0.09856589]
Softmax Pytorch output: tensor([0.6590, 0.2424, 0.0986])


In [9]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

Y = np.array([1,0,0])

Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f"Loss 1: {l1:.4f}")
print(f"Loss 2: {l2:.4f}")

loss = nn.CrossEntropyLoss()
Y = torch.tensor([0])
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print("---Using pytorch---")
print(f"Loss 1: {l1.item()}")
print(f"Loss 2: {l2.item()}")

Loss 1: 0.3567
Loss 2: 2.3026
---Using pytorch---
Loss 1: 0.4170299470424652
Loss 2: 1.840616226196289
