Softmax and Cross Entropy Loss

- Softmax = exponential(yi)/sum of exponentials of (yi)
    - output = classification model (each class is given probability of occurence)

    - softmax using numpy

In [5]:
import torch
import torch.nn as nn
import numpy as np

In [6]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=0)

In [7]:
x = np.array([2.0,1.0,0.1,1.2])
output = softmax(x)
print("softmax numpy: ", output)

softmax numpy:  [0.50844605 0.18704685 0.07604757 0.22845953]


    - softmax using pytorch

In [8]:
x = torch.tensor([2.0,1.0,0.1,1.2])
output = torch.softmax(x, dim=0)
print("softmax torch: ",output)

softmax torch:  tensor([0.5084, 0.1870, 0.0760, 0.2285])


- Cross Entropy Loss = -1/N * sum of (yi * log(yhati))
    - measures performance of classification model whose output = probability
    - lower the loss better the model 
    - loss = divergence of predicted value from actual value

    - cross entropy using numpy

In [9]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual*np.log(predicted))
    return loss

In [12]:
#for class 1
# np.array([0,1,0])
#for class 2
# np.array([0,0,1])
#for class 0
Y = np.array([1,0,0])

In [13]:
Y_pred_good = [0.8,0.4,0.1]
Y_pred_bad= [0.2,0.9,0.1]
l1 = cross_entropy(Y,Y_pred_good)
l2 = cross_entropy(Y,Y_pred_bad)
print(f'loss 1 numpy: {l1:.4f} \nloss 2 numpy: {l2:.4f}')

loss 1 numpy: 0.2231 
loss 2 numpy: 1.6094


    -cross entropy using pytorch

In [14]:
loss = nn.CrossEntropyLoss()


In [None]:
#n_samples = 1, n_classes = 3
Y = torch.tensor([0])
Y_pred_good = torch.tensor([[2.0,1.0,0.1]])
Y_pred_bad = torch.tensor([[0.2,2.0,0.1]])

In [15]:
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
print(f'Loss 1: {l1.item()}, Loss 2: {l2.item()}')

Loss 1: 0.4170299470424652, Loss 2: 2.0737359523773193


In [16]:
_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)
print(f'prediction 1: {predictions1}, prediction 2: {predictions2}')

prediction 1: tensor([0]), prediction 2: tensor([1])


In [17]:
#n_samples = 3, n_classes = 3
Y = torch.tensor([2,0,1])
Y_pred_good = torch.tensor([[1.0,0.1,2.1],[2.0,1.0,0.1],[1.0,2.0,0.1]])
Y_pred_bad = torch.tensor([[0.2,2.0,0.1],[0.2,2.0,0.1],[1.2,0.1,0.1]])

In [18]:
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
print(f'Loss 1: {l1.item()}, Loss 2: {l2.item()}')

Loss 1: 0.40603378415107727, Loss 2: 1.9525808095932007


In [19]:
_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)
print(f'prediction 1: {predictions1}, prediction 2: {predictions2}')

prediction 1: tensor([2, 0, 1]), prediction 2: tensor([1, 1, 0])


Multiclass Classification

In [20]:
class MulticlassNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MulticlassNN,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,num_classes)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(x)
        # no softmax required since CrossEntropyLoss takes care of it
        return out

In [21]:
model = MulticlassNN(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() #applies softmax to 'out'

Binary-class classification

In [25]:
class BinclassNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BinclassNN,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,1)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(x)
        y_pred = torch.sigmoid(out)
        return y_pred

In [26]:
model = BinclassNN(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()