In [1]:
import torch
import torch.nn as nn

## Minimize difference between predicted and actual distribution
- Model should output a probability distribution over C classes. Introduces uncertainity into the model. 
- Model distribution can be represented as $P(y|x_i;\theta)$
- True class distribution can be represented as $P*(y|x_i)$
- KL divergence is a natural way to represent distribution of loss functions
    - $D_{KL}(P||Q) = \sum_i{P(i)\log{\frac{P(i)}{Q(i)}}}$
    - Numerator is independent of Q
    - Minimizing $D_{KL}(P*||P)$ is same as minimizing $-\sum_i{P(i)\log{{Q(i)}}} $. This is the cross entropy loss


### BCE loss

In [11]:
criterion = nn.BCELoss()

incorrect_outputs = torch.tensor([0.3, 0.7])
correct_outputs = torch.tensor([0.9, 0.1])
targets = torch.tensor([1.0, 0.0])

incorrect_loss = criterion(incorrect_outputs, targets)
correct_loss = criterion(correct_outputs, targets)
print(f"Incorrect loss: {incorrect_loss.item()}")
print(f"Correct loss: {correct_loss.item()}")



Incorrect loss: 1.2039728164672852
Correct loss: 0.10536053031682968


### BCE with logits
- Combines BCELoss and sigmoid function in one function. Numerically stable

In [13]:
criterion = nn.BCEWithLogitsLoss()

incorrect_outputs = torch.tensor([-50, 50]).to(torch.float32)
correct_outputs = torch.tensor([50, -50]).to(torch.float32)
targets = torch.tensor([1.0, 0.0]).to(torch.float32)

incorrect_loss = criterion(incorrect_outputs, targets)
correct_loss = criterion(correct_outputs, targets)

print(f"Incorrect loss: {incorrect_loss.item()}")
print(f"Correct loss: {correct_loss.item()}")


Incorrect loss: 50.0
Correct loss: 9.643749466768692e-23


### Cross entropy loss

In [22]:

incorrect_prediction_logits = torch.tensor([
    [0.25, 0.25, 0.25, 0.25],
    [1.0, 0.0, 0.0, 0.0],
]).to(torch.float32)

correct_prediction_logits = torch.tensor([
    [0.0, 0.0, 10.0, 0.0],
    [0.0, 0.0, 0.0, 10.0],
]).to(torch.float32)

target = torch.tensor([2, 3])

print(incorrect_prediction_logits.shape)
print(target.shape)

criterion = nn.CrossEntropyLoss()
incorrect_loss = criterion(incorrect_prediction_logits, target)
correct_loss = criterion(correct_prediction_logits, target)


print(f"Incorrect prediction loss: {incorrect_loss.item()}")
print(f"Correct prediction loss: {correct_loss.item()}")

torch.Size([2, 4])
torch.Size([2])
Incorrect prediction loss: 1.5649813413619995
Correct prediction loss: 0.00013624693383462727


### log softmax + nll = cross entropy loss

In [21]:

incorrect_prediction_logits = torch.tensor([
    [0.25, 0.25, 0.25, 0.25],
    [1.0, 0.0, 0.0, 0.0],
]).to(torch.float32)

correct_prediction_logits = torch.tensor([
    [0.0, 0.0, 10.0, 0.0],
    [0.0, 0.0, 0.0, 10.0],
]).to(torch.float32)


target = torch.tensor([2, 3])
incorrect_log_probs = torch.log_softmax(incorrect_prediction_logits, dim=1)
correct_log_probs = torch.log_softmax(correct_prediction_logits, dim=1)

criterion = nn.NLLLoss()
incorrect_loss = criterion(incorrect_log_probs, target)
correct_loss = criterion(correct_log_probs, target)


print(f"Incorrect prediction loss: {incorrect_loss.item()}")
print(f"Correct prediction loss: {correct_loss.item()}")

Incorrect prediction loss: 1.5649813413619995
Correct prediction loss: 0.00013624693383462727
