In [1]:
# Softmax
# S(Zi) = exp(Zi)/(exp(Z1) + exp(Z2) + ... + exp(ZN))
# Example:
# [2.0, 1.0, 0.1] -> [0.7, 0.2, 0.1]

In [2]:
import torch 
import torch.nn as nn 
import numpy as np 

In [3]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [4]:
x = np.array([2.0, 1.0, 0.1])
softmax(x)

array([0.65900114, 0.24243297, 0.09856589])

In [5]:
x = torch.tensor([2.0, 1.0, 0.1])
torch.softmax(x, dim=0)

tensor([0.6590, 0.2424, 0.0986])

In [6]:
# Cross Entropy Loss for classification model
# D(yHat, y) = -1/N*sum( y[i] * log(yHat[i]) )

In [7]:
def cross_entropy(y, y_pred):
    loss = -np.sum(y * np.log(y_pred))
    return loss / float(y_pred.shape[0])

In [8]:
y = np.array([1, 0, 0])
y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])
loss1 = cross_entropy(y, y_pred_good)
loss2 = cross_entropy(y, y_pred_bad)
print(loss1, loss2)

0.11889164797957748 0.7675283643313485


In [9]:
# In pytorch, the CorssEntropy
# takes y_true to be a single class label (say 0 or 1)
# and y_pred to be non-softmaxed original logits

# In other words, if you plan to use CrossEntropyLoss()
# do Not add softmax() layer in the end!

Loss = nn.CrossEntropyLoss()

In [12]:
Y = torch.tensor([0])
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) # nSamples * nClasses = 1 * 3
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

loss1 = Loss(Y_pred_good, Y)
loss2 = Loss(Y_pred_bad, Y)
print(loss1.item(), loss2.item())

0.4170299470424652 1.840616226196289


In [13]:
_, pred1 = torch.max(Y_pred_good, 1)
_, pred2 = torch.max(Y_pred_bad, 1)
print(pred1)
print(pred2)

tensor([0])
tensor([1])


In [15]:
Y = torch.tensor([2, 0, 1])
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.1, 1.0, 0.1]]) # nSamples * nClasses = 1 * 3
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.1, 3.0, 0.1]])

loss1 = Loss(Y_pred_good, Y)
loss2 = Loss(Y_pred_bad, Y)
print(loss1.item())
print(loss2.item())

_, pred1 = torch.max(Y_pred_good, 1)
_, pred2 = torch.max(Y_pred_bad, 1)
print(pred1)
print(pred2)

0.4653770923614502
1.6241613626480103
tensor([2, 0, 1])
tensor([0, 2, 1])


In [16]:
# Multiclass problem

In [17]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.lienar2(x)
        # Notice no softmax at the end!
        return out

In [18]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)

criterion = nn.CrossEntropyLoss() # This applies softmax itself!

In [24]:
# Slight modification for binary classifier
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.lienar2(out)
        out = torch.sigmoid(out)
        return out

model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()