# Softmax and Cross-Entropy

In [1]:
import numpy as np
import torch as tr
import torch.nn as nn

In [2]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis = 0)

x = np.array([2.0,1.0,0.1])
outputs = softmax(x)
print('softmax numpy :', outputs)

softmax numpy : [0.65900114 0.24243297 0.09856589]


In [3]:
x = tr.tensor([2.0,1.0,0.1])
outputs = tr.softmax(x, dim = 0)
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


In [4]:
# manual implementatioin of the cross_Entropy formula

def cross_Entropy(y_actual, y_pred):
    return -np.sum(y_actual*np.log(y_pred))

# y_actual must be hot encoded
y = np.array([1,0,0])

y_predicted_good = np.array([0.7,0.1,0.2])
y_predicted_bad = np.array([0.1,0.3,0.6])

# less loss means good prediction
loss_good  = cross_Entropy(y, y_predicted_good)
loss_bad = cross_Entropy(y, y_predicted_bad)
print(loss_good)
print(loss_bad)


0.35667494393873245
2.3025850929940455


In [5]:
# cross_entropy using pytorch

loss = nn.CrossEntropyLoss()

# y = tr.tensor([0])
# # n_samples X n_classes = 1 X 3
# y_predicted_good = tr.tensor([[0.7,0.1,0.2]])
# y_predicted_bad = tr.tensor([[0.1,0.3,0.6]])

y = tr.tensor([2,0,1])
# n_samples X n_classes = 3 X 3
y_predicted_good = tr.tensor([[0.3,0.1,0.7], [0.6,0.2,0.2], [0.1,0.8,0.1]])
y_predicted_bad = tr.tensor([[0.1,0.5,0.4], [0.1,0.1,0.8], [0.1,0.3,0.6]])

loss_g = loss(y_predicted_good, y)
loss_b = loss(y_predicted_bad, y)

print(loss_g.item(), loss_b.item())

_, predictiion_g  = tr.max(y_predicted_good, 1)
_, predictiion_b = tr.max(y_predicted_bad, 1)

print(predictiion_g, predictiion_b)

0.7790890336036682 1.196307897567749
tensor([2, 0, 1]) tensor([1, 2, 2])


In [6]:
# nn.CrossEntropyLoss = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)
# so no need of sofmax in last layer of neural network
# Y hax class labels without  one-hot encoding
# Y prediction has raw scores(logits), no sofmax

class NeuralNet_crossEntropyLoss(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet_crossEntropyLoss, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no sofmax in last layer of neural network
        return out


model = NeuralNet_crossEntropyLoss(input_size = 28*28, hidden_size = 5, num_classes = 3)
criterion  =nn.CrossEntropyLoss() # applies softmax



In [7]:
# nn.BCELoss = binary cross entropy loss
# so need of sigmoid in last layer of neural network

# Y hax class labels without  one-hot encoding
# Y prediction has raw scores(logits), no sofmax

class NeuralNet_crossEntropyLoss(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet_crossEntropyLoss, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1) # num_classes is always 1

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # need sigmoid in last layer of neural network
        y_pred = tr.sigmoid(out)

        return y_pred


model = NeuralNet_crossEntropyLoss(input_size = 28*28, hidden_size = 5)
criterion  =nn.BCELoss() 
