## backward()

In [1]:
import torch

x = torch.Tensor([[1],[5]])
x.requires_grad = True
w = torch.Tensor([[0,1],[1,0]])
v = torch.Tensor([[2]])

y = x.t() @ w @ x

# y = 2 * x1 * x2 // f(x1, x2)
# J = [[2*x2, 2*x1] // [[df/dx1, df/dx2]]
# J^T = [[2 * x2], [ 2 * x1]] // [[df/dx1], [df/dx2]]
# v = [[2]]

print(x)
print(w)
print(y)
print(x.grad)
y.backward(v)
print(x.grad)

tensor([[1.],
        [5.]], requires_grad=True)
tensor([[0., 1.],
        [1., 0.]])
tensor([[10.]], grad_fn=<MmBackward>)
None
tensor([[20.],
        [ 4.]])


In [2]:
import torch

x = torch.Tensor([[1],[5]])
x.requires_grad = True
w = torch.Tensor([[0,1],[1,0]])
v = torch.Tensor([[1]])

y = x.t() @ w @ x

# y = 2 * x1 * x2
# J = [[2*x2, 2*x1]
# J^T = [[2 * x2], [ 2 * x1]]
# v = [[1], [1]]

print(x)
print(w)
print(y)
y.backward()
print(x.grad)

tensor([[1.],
        [5.]], requires_grad=True)
tensor([[0., 1.],
        [1., 0.]])
tensor([[10.]], grad_fn=<MmBackward>)
tensor([[10.],
        [ 2.]])


In [3]:
import torch

x = torch.randn(2, 1, requires_grad=True)
w = torch.Tensor([[1,2], [3,4]])
v = torch.Tensor([[1], [1]])

y = w @ x

# y1 = x1 + 2x2
# y2 = 3x1 + 4x2
# J = [[1,2], [3,4]]
# J^T = [[1,3], [2,4]]
# v = [[1], [1]]

print(x)
print(w)
print(y)
y.backward(v)
print(x.grad)

tensor([[-1.2550],
        [ 1.2701]], requires_grad=True)
tensor([[1., 2.],
        [3., 4.]])
tensor([[1.2852],
        [1.3155]], grad_fn=<MmBackward>)
tensor([[4.],
        [6.]])


## cross_entropy, log_softmax, nll

- https://medium.com/@zhang_yang/understanding-cross-entropy-implementation-in-pytorch-softmax-log-softmax-nll-cross-entropy-416a2b200e34

### setup

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
torch.manual_seed(0)
batch_size, n_classes = 5, 3
x = torch.randn(batch_size, n_classes)
x.shape

torch.Size([5, 3])

In [6]:
x

tensor([[ 1.5410, -0.2934, -2.1788],
        [ 0.5684, -1.0845, -1.3986],
        [ 0.4033,  0.8380, -0.7193],
        [-0.4033, -0.5966,  0.1820],
        [-0.8567,  1.1006, -1.0712]])

In [7]:
target = torch.randint(n_classes, size=(batch_size,), dtype=torch.long)
target

tensor([2, 0, 1, 0, 1])

### softmax + nl (negative likelihood)

In [8]:
def softmax(x): return x.exp() / (x.exp().sum(-1)).unsqueeze(-1)
def nl(input, target): return -input[range(target.shape[0]), target].log().mean()

pred = softmax(x)
loss = nl(pred, target)
loss

tensor(1.2616)

In [9]:
pred = softmax(x)
loss=nl(pred, target)
loss

tensor(1.2616)

### log_softmax + nll (negative log likelihood)

In [10]:
def log_softmax(x): return x - x.exp().sum(-1).log().unsqueeze(-1)
def nll(input, target): return -input[range(target.shape[0]), target].mean()

pred = log_softmax(x)
print(pred)
print(pred.exp())
print(target)
print(target.shape[0])
loss = nll(pred, target)
loss

tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011],
        [-2.1846, -0.2273, -2.3991]])
tensor([[0.8446, 0.1349, 0.0205],
        [0.7511, 0.1438, 0.1051],
        [0.3484, 0.5382, 0.1134],
        [0.2762, 0.2277, 0.4961],
        [0.1125, 0.7967, 0.0908]])
tensor([2, 0, 1, 0, 1])
5


tensor(1.2616)

### F.log_softmax + F.nll

In [11]:
pred = F.log_softmax(x, dim=-1)
print(pred)
print(target)
print(target.shape[0])
loss = F.nll_loss(pred, target)
loss

tensor([[-0.1689, -2.0033, -3.8886],
        [-0.2862, -1.9392, -2.2532],
        [-1.0543, -0.6196, -2.1769],
        [-1.2865, -1.4797, -0.7011],
        [-2.1846, -0.2273, -2.3991]])
tensor([2, 0, 1, 0, 1])
5


tensor(1.2616)

### F.cross_entropy

In [12]:
F.cross_entropy(x, target)

tensor(1.2616)