In [1]:
import torch
import torch.nn as nn
import numpy as np

In [6]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual*np.log(predicted))
    return loss

In [7]:
Y = np.array([1,0,0])

In [8]:
Y_pred_good = np.array([0.7,0.2,0.1])
Y_pred_bad = np.array([0.1,0.3,0.6])

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f"Loss1 numpy: {l1:.4f}")
print(f"Loss2 numpy: {l2:.4f}")

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [11]:
# Pytorch implementation

loss = nn.CrossEntropyLoss()
# Careful with PyTorch CEL implementation!
# This function applies LogSoftMax and NLLLoss(negative log likelihood loss)
# No softmax in last layer
# Y has class labels, not one-hot
# Y_pred has raw scores (logits), no softmax!

In [18]:
Y = torch.tensor([2, 0, 1])

#n samples x nclasses = 3x3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [1.5, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.5, 1.2, 0.3],[0.5, 0.2, 2.1],[0.5, 3.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

In [19]:
print(l1.item(), l2.item())

0.3488292694091797 1.5216106176376343


In [20]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

print(predictions1, predictions2)

tensor([2, 0, 1]) tensor([0, 2, 1])


In [21]:
# Testing loss function on neural network

class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.Linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.Linear2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at end
        return out


In [22]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()

In [23]:
### Activation Functions

# Sigmoid -> typically in the last layer of a binary classification problem
# Tanh -> Scaled and shifted sigmoid function. Good for hidden layers
# ReLU -> Most popular choice for hidden layers. If you dont know which to use just use ReLU
# Leaky ReLU -> Improved version of the ReLU that tries to solve vanishing gradient problem
# Softmax -> Good in last layer of multiclass classification problem

In [25]:
import torch.nn.functional as F

In [26]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        # nn.Sigmoid
        # nn.Softmax
        # nn.TanH
        # nn.LeakyReLU
        self.linear2 = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        
        return out