In [None]:
import torch

## 基础尝试

创建的tensor默认是不求梯度的

In [None]:
a = torch.tensor([1.0,2.0,3.0])
print(a.requires_grad)
b = torch.tensor([4.0,5.0,6.0], requires_grad=True)
print(b.requires_grad)

通过 `requires_grad_()`来令张量求梯度

In [None]:
a.requires_grad_()
a.requires_grad

若需要求梯度，则tensor的元素需要是float类型的

In [None]:
a = torch.tensor([1,2,3], requires_grad=True)

## 反向传播


一般我们都是对loss做反向传播，loss是一个标量。下面就是标量的反向传播。
$$
a = [a_0, a_1, a_2] = [1, 2, 3] 
\\[5mm]
c = a_0^2 + a_1^2 + a_2^2
\\[5mm]
\frac{\partial c}{\partial a_0} = 2 a_0 = 2 \times 1 = 2
\\[5mm]
\frac{\partial c}{\partial a_1} = 2 a_1 = 2 \times 2 = 4
\\[5mm]
\frac{\partial c}{\partial a_2} = 2 a_2 = 2 \times 3 = 6
$$

In [None]:
a = torch.tensor([1.0,2.0,3.0], requires_grad=True) # 由用户创建，为叶子张量
b = a**2
c = b.sum()
c.backward() #反向传播（求导），对于叶子张量的导数会被保存，其他中间量的导数会被释放。
a.grad

上面是标量的求导，那么张量的的求导呢？
$$
a = [a_0, a_1, a_2] = [1, 2, 3]
\\[5mm]
b = [a_0^2 + a_1, a_1+a_2, a_2^3]
\\[5mm]
Jacobian = \frac{\partial b}{\partial a} =  
\begin{pmatrix}
\frac{\partial b_0}{\partial a_0} & \frac{\partial b_1}{\partial a_0} & \frac{\partial b_2}{\partial a_0} \\
\frac{\partial b_0}{\partial a_1} & \frac{\partial b_1}{\partial a_1} & \frac{\partial b_2}{\partial a_1} \\
\frac{\partial b_0}{\partial a_2} & \frac{\partial b_1}{\partial a_2} & \frac{\partial b_2}{\partial a_2}
\end{pmatrix}
=
\begin{pmatrix}
2a_0 & 0 & 0 \\
1 & 1 & 0 \\
0 & 1 & 3 a_2^2
\end{pmatrix}
=
\begin{pmatrix}
2 & 0 & 0 \\
1 & 1 & 0 \\
0 & 1 & 27
\end{pmatrix}
$$




In [None]:
Jacobian = torch.zeros(3,3)
a = torch.tensor([1.0,2.0,3.0], requires_grad=True) # 由用户创建，为叶子张量
b = torch.zeros(3)
b[0] = a[0]**2 + a[1]
b[1] = a[1]+a[2]
b[2] = a[2]**3
b.backward(torch.tensor([1,0,0]), retain_graph=True)
print(a.grad) # d b_0 / d a
a.grad.zero_()
b.backward(torch.tensor([0,1,0]), retain_graph=True)
print(a.grad) # d b_1 / d a
a.grad.zero_()
b.backward(torch.tensor([0,0,1]), retain_graph=True)
print(a.grad) # d b_2 / d a


 一个反向传播的例子

In [None]:
# 定义张量
x = torch.ones(5, requires_grad=True)
w1 = torch.tensor(2.0, requires_grad=True)
w2 = torch.tensor(3.0, requires_grad=True)
b = torch.tensor(4.0, requires_grad=False)

# 执行正向操作
l1 = x * w1
l2 = l1 + b
l3 = l2 * w2
y = l3.mean()

# 反向传播
y.backward()

print(l1.data, l1.grad, l1.grad_fn)
# tensor([2., 2., 2., 2., 2.]) None <MulBackward0 object at 0x0000024D8E921BE0>
print(l2.data, l2.grad, l2.grad_fn)
# tensor([6., 6., 6., 6., 6.]) None <AddBackward0 object at 0x000001B960FC0F98>
print(y)
# tensor(18., grad_fn=<MeanBackward0>)
print(w1.grad, w2.grad)
# tensor(3.) tensor(6.)
print(x.grad)
# tensor([1.2000, 1.2000, 1.2000, 1.2000, 1.2000])