# Softmax and Cross Entropy<p>


Softmax

In [1]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
  return np.exp(x)/ np.sum(np.exp(x),axis=0)

x= np.array([2.0,1.0,0.1])
outputs = softmax(x)
print('softmax numpy:',outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [2]:
x = torch.tensor([2.0,1.0,0.1])
torch.softmax(x,dim=0) #along the first axis


tensor([0.6590, 0.2424, 0.0986])

Cross Entropy


In [3]:
def cross_entropy(actual,predicted):
  loss = - np.sum(actual * np.log(predicted))
  return loss

# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]

Y = np.array([1,0,0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y,Y_pred_good)
l2 = cross_entropy(Y,Y_pred_bad)

print(f'Loss1 numpy : {l1:.4f}') #lower 
print(f'Loss2 numpy : {l2:.4f}')

Loss1 numpy : 0.3567
Loss2 numpy : 2.3026


In [4]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([0])
# nsamples x nclasses = 1 x 3
Y_pred_good = torch.tensor([2.0,1.0,0.1]).view(1,3)
Y_pred_bad = torch.tensor([[0.5,2.0,0.1]]) #or can be written as this

l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}')

_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)
print(predictions1)
print(predictions2)

Loss1 numpy : 0.4170299470424652
Loss2 numpy : 1.8167786598205566
tensor([0])
tensor([1])


In [5]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([2])
# nsamples x nclasses = 1 x 3
Y_pred_bad = torch.tensor([2.0,1.0,0.1]).view(1,3)
Y_pred_good = torch.tensor([[0.5,2.0,2]]) #or can be written as this

#l1 and l2 is swapped
l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}') 

_, predictions1 = torch.max(Y_pred_good,dim=1)
_, predictions2 = torch.max(Y_pred_bad,dim=1)
print(predictions1)
print(predictions2)


Loss1 numpy : 0.798916220664978
Loss2 numpy : 2.3170299530029297
tensor([2])
tensor([0])


# Multiple samples

In [6]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([2,0,1])

# nsamples x nclasses = 3 x 3
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}')

_, predictions1 = torch.max(Y_pred_good,dim=1)
_, predictions2 = torch.max(Y_pred_bad,dim=1)

print(predictions1)
print(predictions2)

Loss1 numpy : 0.28342217206954956
Loss2 numpy : 1.6418448686599731
tensor([2, 0, 1])
tensor([0, 2, 0])
