In [1]:
import torch
import torch.nn as nn

## Inputs

In [31]:
z = torch.tensor([[1,0.5,2],[0.5,3,1]]) # Logits
y = torch.tensor([2,1])                 # Target

## Approach 1 - Softmax -> Log -> CE Loss

### Step 1 - Calculate Softmax

In [6]:
def softmax(z):
    return z.exp()/(z.exp().sum(-1)).unsqueeze(1)

In [5]:
z = torch.tensor([[1,0.5,2],[0.5,3,1]])
p, p_pytorch = softmax(z), nn.functional.softmax(z)
p, p_pytorch

  p, p_pytorch = softmax(z), nn.functional.softmax(z)


(tensor([[0.2312, 0.1402, 0.6285],
         [0.0674, 0.8214, 0.1112]]),
 tensor([[0.2312, 0.1402, 0.6285],
         [0.0674, 0.8214, 0.1112]]))

PyTorch uses Normalized Softmax function which handles stability issues with exponentiation due to large or highly negative logits

### Step 2 - Apply log

In [9]:
logp = torch.log(p)
logp

tensor([[-1.4644, -1.9644, -0.4644],
        [-2.6967, -0.1967, -2.1967]])

### Step 3 - Dot Product

_Case 1 - Output is given in One-Hot Encoded form_

In [7]:
y = torch.tensor([[0,0,1],[0,1,0]],dtype=torch.float32)
y

tensor([[0., 0., 1.],
        [0., 1., 0.]])

In [15]:
def nll_loss(logp,y):
    return torch.tensor([-torch.dot(logp_,y_) for logp_,y_ in zip(logp,y)])

In [17]:
loss = nll_loss(logp,y)
loss

tensor([0.4644, 0.1967])

_Case 2 - Output is given in class number form_

In [22]:
y = torch.tensor([2,1]) 
y

tensor([2, 1])

In [18]:
def nll_loss(logp,y):
    return -logp[range(y.shape[0]),y]

In [23]:
loss = nll_loss(logp,y)
loss

tensor([0.4644, 0.1967])

### Step 4 - Calculate Mean

In [25]:
ce_loss, ce_loss_pytorch = loss.mean(), nn.functional.nll_loss(logp,y)
ce_loss, ce_loss_pytorch

(tensor(0.3306), tensor(0.3306))

## Approach 2 - LogSoftmax -> CE Loss

In [None]:
z = torch.tensor([[1,0.5,2],[0.5,3,1]]) # Logits
y = torch.tensor([2,1])                 # Target

### Step 1 - Calculate LogSoftmax

In [26]:
def log_softmax(z):
    z_max, _ = torch.max(z,-1,keepdim=True)
    return z - z_max - (z-z_max).exp().sum(-1).log().unsqueeze(1)

In [27]:
logp, logp_pytorch = log_softmax(z), nn.functional.log_softmax(z)
logp, logp_pytorch

  logp, logp_pytorch = log_softmax(z), nn.functional.log_softmax(z)


(tensor([[-1.4644, -1.9644, -0.4644],
         [-2.6967, -0.1967, -2.1967]]),
 tensor([[-1.4644, -1.9644, -0.4644],
         [-2.6967, -0.1967, -2.1967]]))

### Step 2 - Calculate CE Loss

In [28]:
def nll_loss(logp,y):
    return -logp[range(len(y)),y].mean()

In [29]:
ce_loss, ce_loss_pytorch = nll_loss(logp,y), nn.functional.nll_loss(logp,y)
ce_loss, ce_loss_pytorch

(tensor(0.3306), tensor(0.3306))

## Approach 3 - Calculate CE Loss Directly

In [None]:
z = torch.tensor([[1,0.5,2],[0.5,3,1]]) # Logits
y = torch.tensor([2,1])                 # Target

In [30]:
ce_loss = nn.functional.cross_entropy(z,y)
ce_loss

tensor(0.3306)