## The only imports we need

In [9]:
import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable

## Declare Inputs and targets

Every loss function needs inputs and targets

In [10]:
input_regression = torch.Tensor([1, 2, 3, 4, 5])
target_regression = torch.Tensor([1, 2, 3, 4, 6])

input_classification = torch.Tensor([[1, 2, 3], [1, 2, 3], [1, 2, 3]]).transpose(1, 0)
target_classification = torch.LongTensor([1, 2, 3, 4, 5]) # torch.LongTensor(3).random_(5)

print('input_regression:', input_regression)
print('input_classification:', input_classification)
print('target_regression:', target_regression)
print('target_classification:', target_classification)
# todo print these in numpy or clearer
# todo show math in markdown
# work through each methodically and clearly

input_regression: 
 1
 2
 3
 4
 5
[torch.FloatTensor of size 5]

input_classification: 
 1  1  1
 2  2  2
 3  3  3
[torch.FloatTensor of size 3x3]

target_regression: 
 1
 2
 3
 4
 6
[torch.FloatTensor of size 5]

target_classification: 
 1
 2
 3
 4
 5
[torch.LongTensor of size 5]



## L1Loss AKA absolute loss

\begin{align}
\sum \left|y_i - h(x_i)\right|
\end{align}

In [37]:
# L1Loss AKA absolute loss
#loss = nn.L1Loss(reduce=False) # Shows loss for each single value
#loss = nn.L1Loss()
loss = nn.L1Loss(size_average=False) # Doesn't average by the number of elements
input = autograd.Variable(input_regression, requires_grad=True) # todo requires necessary?
target = autograd.Variable(target_regression)
print(input, target)
output = loss(input, target)
#output.backward()
print('L1Loss: {}'.format(output))

# todo explain and show output gradients. 
# We want the user to get a real intuition for how the final layer is wrong.

Variable containing:
 1
 2
 3
 4
 5
[torch.FloatTensor of size 5]
 Variable containing:
 1
 2
 3
 4
 6
[torch.FloatTensor of size 5]



TypeError: forward() got an unexpected keyword argument 'size_average'

In [12]:
# MSE Loss AKA AKA AKA
loss = nn.MSELoss()
input = autograd.Variable(input_regression, requires_grad=True)
target = autograd.Variable(target_regression)
output = loss(input, target)
output.backward()
print('MSELoss: {}'.format(output))

MSELoss: Variable containing:
 0.2000
[torch.FloatTensor of size 1]



In [13]:
# CrossEntropyLoss
loss = nn.CrossEntropyLoss()
#input = autograd.Variable(input_classification, requires_grad=True)
#target = autograd.Variable(target_classification)
input = autograd.Variable(torch.randn(3, 5), requires_grad=True)
target = autograd.Variable(torch.LongTensor(3).random_(5))
output = loss(input, target)
output.backward()
print('CrossEntropyLoss: {}'.format(output))

CrossEntropyLoss: Variable containing:
 1.9333
[torch.FloatTensor of size 1]



In [14]:
# NLLLoss
m = nn.LogSoftmax()
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = autograd.Variable(torch.randn(3, 5), requires_grad=True)
# each element in target has to have 0 <= value < C
target = autograd.Variable(torch.LongTensor([1, 0, 4]))
output = loss(m(input), target)
output.backward()
print('NLLLoss: {}'.format(output))

NLLLoss: Variable containing:
 2.7543
[torch.FloatTensor of size 1]





In [15]:
# PoissonNLLLoss # Negative log likelihood loss with Poisson distribution of target.
loss = nn.PoissonNLLLoss()
log_input = autograd.Variable(torch.randn(5, 2), requires_grad=True)
target = autograd.Variable(torch.randn(5, 2))
output = loss(log_input, target)
output.backward()
print('PoissonNLLLoss: {}'.format(output))

PoissonNLLLoss: Variable containing:
 0.7165
[torch.FloatTensor of size 1]



In [16]:
# NLLLoss2d # negative log likehood loss, but for image inputs. It computes NLL loss per-pixel.
m = nn.Conv2d(16, 32, (3, 3)).float()
loss = nn.NLLLoss2d()
# input is of size N x C x height x width
input = autograd.Variable(torch.randn(3, 16, 10, 10))
# each element in target has to have 0 <= value < C
target = autograd.Variable(torch.LongTensor(3, 8, 8).random_(0, 4))
output = loss(m(input), target)
output.backward()
print('NLLLoss2d: {}'.format(output))

NLLLoss2d: Variable containing:
1.00000e-02 *
 -1.0574
[torch.FloatTensor of size 1]



In [17]:
# KLDivLoss # The Kullback-Leibler divergence Loss

In [18]:
# BCELoss # Binary Cross Entropy
m = nn.Sigmoid()
loss = nn.BCELoss()
input = autograd.Variable(torch.randn(3), requires_grad=True)
target = autograd.Variable(torch.FloatTensor(3).random_(2))
output = loss(m(input), target)
output.backward()
print('BCELoss: {}'.format(output))

BCELoss: Variable containing:
 0.6185
[torch.FloatTensor of size 1]



In [19]:
# BCEWithLogitsLoss # This loss combines a Sigmoid layer and the BCELoss in one single class

In [20]:
# MarginRankingLoss # Creates a criterion that measures the loss given inputs x1, x2, two 1D mini-batch Tensor`s, and a label 1D mini-batch tensor `y with values (1 or -1).


In [21]:
# HingeEmbeddingLoss
'''                 { x_i,                  if y_i ==  1
loss(x, y) = 1/n {
                    { max(0, margin - x_i), if y_i == -1'''

'                 { x_i,                  if y_i ==  1\nloss(x, y) = 1/n {\n                    { max(0, margin - x_i), if y_i == -1'

In [22]:
#MultiLabelMarginLoss # multi-class multi-classification hinge loss (margin-based loss) 
# loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x.size(0)

In [23]:
# SmoothL1Loss AKA Huber loss # Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise.

In [24]:
# SoftMarginLoss # two-class classification logistic loss

In [25]:
# MultiLabelSoftMarginLoss # multi-label one-versus-all loss based on max-entropy

In [26]:
# CosineEmbeddingLoss

In [27]:
# MultiMarginLoss

In [28]:
# TripletMarginLoss
'''triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
input1 = autograd.Variable(torch.randn(100, 128))
input2 = autograd.Variable(torch.randn(100, 128))
input3 = autograd.Variable(torch.randn(100, 128))
output = triplet_loss(input1, input2, input3)
output.backward()'''

'triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)\ninput1 = autograd.Variable(torch.randn(100, 128))\ninput2 = autograd.Variable(torch.randn(100, 128))\ninput3 = autograd.Variable(torch.randn(100, 128))\noutput = triplet_loss(input1, input2, input3)\noutput.backward()'