In [14]:
import torch
from torch.nn.functional import binary_cross_entropy_with_logits
from torch.nn.functional import binary_cross_entropy

# 1.模型及其损失函数

$$z_{(1, 3)} = x_{(1, 5)}w_{(5, 3)} + b_{(1, 3)}$$

In [22]:
x = torch.ones(5)
y = torch.zeros(3)
w = torch.randn(5, 3, requires_grad = True)
b = torch.randn(3, requires_grad = True)
z = torch.matmul(x, w) + b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

print(f"x: {x}")
print(f"shape of x: {x.shape}")
print()
print(f"w: {w}")
print(f"shape of w: {w.shape}")
print()
print(f"b: {b}")
print(f"shape of b: {b.shape}")
print()
print(f"z: {z}")
print(f"shape of z: {z.shape}")
print()
print(f"y: {y}")
print(f"shape of z: {y.shape}")
print()
print(f"loss: {loss}")
print(f"shape of loss: {loss.size()}")
print()
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

x: tensor([1., 1., 1., 1., 1.])
shape of x: torch.Size([5])

w: tensor([[-0.2893,  0.5604, -0.5727],
        [-0.6208,  0.3335,  0.1034],
        [ 1.4042, -0.4423,  0.0864],
        [-0.8411,  0.8256,  1.3893],
        [ 1.3486, -0.7275,  0.2530]], requires_grad=True)
shape of w: torch.Size([5, 3])

b: tensor([ 1.0191, -2.4529,  1.1018], requires_grad=True)
shape of b: torch.Size([3])

z: tensor([ 2.0207, -1.9032,  2.3613], grad_fn=<AddBackward0>)
shape of z: torch.Size([3])

y: tensor([0., 0., 0.])
shape of z: torch.Size([3])

loss: 1.5785146951675415
shape of loss: torch.Size([])

Gradient function for z = <AddBackward0 object at 0x12b793d90>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x12b793f50>


## 1.Tensors, Functions and Computational graph

## 计算权重的梯度

In [23]:
loss.backward()
print(f"Gradient of w: {w.grad}")
print(f"Gradient of b: {b.grad}")

Gradient of w: tensor([[0.2943, 0.0432, 0.3046],
        [0.2943, 0.0432, 0.3046],
        [0.2943, 0.0432, 0.3046],
        [0.2943, 0.0432, 0.3046],
        [0.2943, 0.0432, 0.3046]])
Gradient of b: tensor([0.2943, 0.0432, 0.3046])


## 禁止梯度跟踪

In [26]:
z = torch.matmul(x, w) + b
print(z.requires_grad)

True


In [28]:
with torch.no_grad():
    z = torch.matmul(x, w) + b
print(z.requires_grad)

False


In [29]:
z = torch.matmul(x, w) + b
z_det = z.detach()
print(z_det.requires_grad)

False


# 2.张量梯度和雅克比乘积

In [40]:
inp = torch.eye(5, requires_grad = True)
print(f"inp\n{inp}")

out = (inp + 1).pow(2)
print(f"out\n{out}")

out.backward(torch.ones_like(inp), retain_graph = True)
print("First call\n", inp.grad)

out.backward(torch.ones_like(inp), retain_graph = True)
print("Second call\n", inp.grad)

inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph = True)
print("Call after zeroing gradients\n", inp.grad)

inp
tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]], requires_grad=True)
out
tensor([[4., 1., 1., 1., 1.],
        [1., 4., 1., 1., 1.],
        [1., 1., 4., 1., 1.],
        [1., 1., 1., 4., 1.],
        [1., 1., 1., 1., 4.]], grad_fn=<PowBackward0>)
First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])
Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
