In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
  return np.exp(x)/np.sum(np.exp(x) ,axis =0)

In [3]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
outputs

array([0.65900114, 0.24243297, 0.09856589])

In [4]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim =0)
outputs

tensor([0.6590, 0.2424, 0.0986])

In [5]:
def cross_entropy(actual, predicted):
  loss = -np.sum(actual * np.log(predicted))
  return loss # / float(predicted.shape[0])

In [6]:
#Y must be one hot encoded
Y = np.array([1,0,0])

In [7]:
Y_pred_good =  np.array([0.7,0.2,0.1])
Y_pred_bad =  np.array([0.1,0.3,0.6])
l1  = cross_entropy(Y, Y_pred_good)
l2  = cross_entropy(Y, Y_pred_bad)
l1, l2

(0.35667494393873245, 2.3025850929940455)

In [None]:
#Note
'''
nn.CrossEntropyLoss applies nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)
-> do not use softmax in last layer when using nn.CrossEntropyLoss

-> Y has class labels, not one-hot
-> Y_pred has raw scores (logits), no softmax

'''

'''
nn.BCELoss does not apply torch.signmoid
-> need to use sigmoid in last layer when using nn.BCELoss

'''

In [14]:
loss = nn.CrossEntropyLoss()


Y = torch.tensor([0])
# nsamples x nclasses = 1 x 3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
l1.item(), l2.item()

(0.4170299470424652, 1.840616226196289)

In [15]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
predictions1, predictions2

(tensor([0]), tensor([1]))

In [16]:
loss = nn.CrossEntropyLoss()

# 3 samples
Y = torch.tensor([2, 0, 1])
# nsamples x nclasses = 3 x 3
Y_pred_good = torch.tensor([[0.04, 1.0, 2.1],[2.0, 1.0, 0.1],[0.2, 2.0, 0.1] ])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3], [0.1, 2.0, 0.1], [2.0, 1.0, 0.1]])
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

l1.item(), l2.item(), predictions1, predictions2

(0.35647499561309814, 1.8731155395507812, tensor([2, 0, 1]), tensor([1, 1, 0]))

In [None]:
#how does it work in a typical NN

#multiclass problem

class NeuralNet2(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet2, self).__init__()
    self.linear1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out)
    #no softmax at the end
    return out

model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies softmax

In [None]:
#how does it work in a typical NN

#binaryclass problem

class NeuralNet1(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet2, self).__init__()
    self.linear1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out)
    #sigmoid at the end
    y_pred = torch.sigmoid(out)
    return y_pre

model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()