In [1]:
import numpy as np
import torch
import torch.nn as nn

## Softmax

- It returns probabilities for values. Also Sum of all probabilities equals to 1.

$\Large S(y_i)=\frac{e^{y_i}}{\sum{e^{y_j}}} $

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
x = np.array([2.0, 1.0, 0.1])
print(softmax(x))

[0.65900114 0.24243297 0.09856589]


In [4]:
# Using torch
x = torch.tensor([2.0, 1.0, 0.1])
print(torch.softmax(x, dim=0))

tensor([0.6590, 0.2424, 0.0986])


## Cross Entropy

$\large D(\hat{Y}, Y) = -\frac{1}{N} \cdot \sum{Y_i \cdot log(\hat{Y_i})} $

Here $Y_i$ is one-hot encoded and $\hat{Y_i}$ is softmax output.

$\large \begin{matrix}
  Y_i = [1, 0, 0] \\
  \hat{Y_i} = [0.7, 0.2, 0.1]
 \end{matrix} \rightarrow  D(\hat{Y}, Y) = \color{green}{0.35}$
 
$\large \begin{matrix}
  Y_i = [1, 0, 0] \\
  \hat{Y_i} = [0.1, 0.3, 0.6]
 \end{matrix} \rightarrow  D(\hat{Y}, Y) = \color{red}{2.30}$

In [5]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # /float(predicted.shape[0])

In [6]:
# y must be one hot encoded
# if class 0: [1, 0, 0]
# if class 1: [1, 1, 0]
# if class 2: [1, 0, 1]
Y = np.array([1, 0, 0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(l1, l2)

0.35667494393873245 2.3025850929940455


In [11]:
# Using torch
# nn.CrossEntropyLoss applies nn.LogSoftmax + nn.LLLoss
# So no softmax layer also no one-hot encoding, provide class labels directly

loss = nn.CrossEntropyLoss()

# Class 0
Y = torch.tensor([0])
# nsamples x nclasses = 1x3, Also raw values, no softmax
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l1 = loss(Y_pred_bad, Y)

print(l1.item(), l2.item())


# To get prediction we take max value in output
_, prediction1 = torch.max(Y_pred_good, dim=1)
_, prediction2 = torch.max(Y_pred_bad, dim=1)

print(prediction1.item(), prediction2.item())

1.840616226196289 2.3025850929940455
0 1


In [13]:
# More Samples
loss = nn.CrossEntropyLoss()

# Class 0
Y = torch.tensor([2, 0, 1])
# nsamples x nclasses = 1x3, Also raw values, no softmax
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.2, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.2, 3.0, 0.1]])

l1 = loss(Y_pred_good, Y)
l1 = loss(Y_pred_bad, Y)

print(l1.item(), l2.item())


# To get prediction we take max value in output
_, prediction1 = torch.max(Y_pred_good, dim=1)
_, prediction2 = torch.max(Y_pred_bad, dim=1)

print(prediction1, prediction2)

1.6258946657180786 2.3025850929940455
tensor([2, 0, 1]) tensor([0, 2, 1])


In [3]:
# Binary Classification
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # Sigmoid before BCELoss
        y_pred = torch.sigmoid(out)
        return y_pred

In [None]:
model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

In [2]:
# Multiclass Neural Network
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out


In [None]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies softmax