In [1]:
import torch
torch.manual_seed(10)

<torch._C.Generator at 0x1f5bd01de10>

In [6]:
# retain_graph保存计算图
w = torch.tensor([1.], requires_grad=True)
x = torch.tensor([2.], requires_grad=True)

a = torch.add(w, x)
b = torch.add(w, 1)
y = torch.mul(a, b)
# 反向传播要保存计算图
y.backward(retain_graph=True)
print(w.grad, x.grad)
y.backward()
print(w.grad, x.grad)   # 多次反向传播会使梯度累加

tensor([5.]) tensor([2.])
tensor([10.]) tensor([4.])


In [7]:
# grad_tensors多梯度权重如何求梯度
w = torch.tensor([1.], requires_grad=True)
x = torch.tensor([2.], requires_grad=True)

a = torch.add(w, x)
b = torch.add(w, 1)

y0 = torch.mul(a, b)  # y0 = (x+w) * (w+1)
y1 = torch.add(a, b)  # y1 = (x+w) + (w+1)

loss = torch.cat([y0, y1], dim=0)
grad_tensors = torch.tensor([1., 2.])

loss.backward(gradient=grad_tensors)
print(w.grad, x.grad)

tensor([9.]) tensor([4.])


该 loss 由两部分组成。而 gradtensors 设置两个 loss 对 w 的权重分别为 1 和 2。因此最终 w 的梯度为：$\frac{\partial y{0}}{\partial w} \times 1+ \frac{\partial y_{1}}{\partial w} \times 2=9$


In [8]:
# autograd.grad
x = torch.tensor([3.], requires_grad=True)
y = torch.pow(x, 2)
grad_1 = torch.autograd.grad(y, x, create_graph=True)
print(grad_1)
grad_2 = torch.autograd.grad(grad_1[0], x)
print(grad_2)

(tensor([6.], grad_fn=<MulBackward0>),)
(tensor([2.]),)


**需要注意的三个点**：
- 在每次反向传播求导时，梯度不会自动清零。如果进行多次迭代计算梯度而没有清零，那么梯度会在前一次的基础上叠加。
- 依赖于叶子节点的节点，requires_grad 属性默认为 True
- 叶子节点不可执行 inplace 操作
    - inplace 操作有a += x，a.add_(x)
    - 非 inplace 操作有a = a + x，a.add(x)

In [9]:
# tip1: 每反向传播一次，用w.grad.zero_()梯度清0
w = torch.tensor([1.], requires_grad=True)
x = torch.tensor([2.], requires_grad=True)

for i in range(4):
    a = torch.add(w, x)
    b = torch.add(w, 1)
    y = torch.mul(a, b)

    y.backward()
    print(w.grad)

    w.grad.zero_()

tensor([5.])
tensor([5.])
tensor([5.])
tensor([5.])


In [10]:
# tip2: 若叶子结点需要梯度，则上面的非叶子结点也需要
w = torch.tensor([1.], requires_grad=True)
x = torch.tensor([2.], requires_grad=True)

a = torch.add(w, x)
b = torch.add(w, 1)
y = torch.mul(a, b)

print(a.requires_grad, b.requires_grad, y.requires_grad)

True True True


In [12]:
# tip3: 叶子节点不可执行 inplace 操作
a = torch.ones((1, ))
print(id(a), a)

a += torch.ones((1, ))
print(id(a), a)

w = torch.tensor([1.], requires_grad=True)
x = torch.tensor([2.], requires_grad=True)

a = torch.add(w, x)
b = torch.add(w, 1)
y = torch.mul(a, b)

w.add_(1)

y.backward()

2154985052104 tensor([1.])
2154985052104 tensor([2.])


RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.