In [1]:
import torch
import numpy as np

## 简单的例子
t = a + b

In [3]:
a = torch.full((3,3),1,dtype=torch.float32,requires_grad =True) # required_grad=True 开启即可反向传播求梯度
b = torch.full((3,3),2,dtype=torch.float32)
b.requires_grad =True
a,b

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]], requires_grad=True),
 tensor([[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]], requires_grad=True))

In [4]:
# 运算
t = a + b # a或者其中一个requires_grad = True, 经过a和b运算的新变量也是requires_grad = True
y = t.sum()
t.requires_grad

True

In [5]:
# 反向传播
y.backward()

In [6]:
# 获得梯度值
a.grad, b.grad, t.grad # 只有叶子节点有grad

  a.grad, b.grad, t.grad # 只有叶子节点有grad


(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 None)

## 例子
t = w*a + b  

a = [[1,1],[1,1]]  
b = [[2,2],[2,2]]  
w = [[2,2],[2,2]]  

In [50]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True) # required_grad=True 开启即可反向传播求梯度
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)
w = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

In [56]:
y = w * a 
t = y + b
z = t.sum()

z.backward()

print(a.is_leaf, b.is_leaf, w.is_leaf, y.is_leaf, t.is_leaf)  # 判断是否为叶子节点
a.grad, b.grad, w.grad, y.grad, t.grad

True True True False False


  a.grad, b.grad, w.grad, y.grad, t.grad


(tensor([[10., 10.],
         [10., 10.]]),
 tensor([[5., 5.],
         [5., 5.]]),
 tensor([[5., 5.],
         [5., 5.]]),
 None,
 None)

## 梯度的累加

In [15]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True)
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

# 初始梯度为None
print(a.grad)

# 第一次累加的梯度
c = a + b
# c.backward() # grad can be implicitly created only for scalar outputs
t = c.sum()
t.backward()
print(a.grad)
# with torch.no_grad():

# 第二次梯度
c = a + b
t = c.sum()
t.backward()
print(a.grad)

None
tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])


### X.grad=None 手动把梯度归零

In [25]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True)
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

# 初始梯度为None
print(a.grad)

# 第一次累加的梯度
c = a + b
# c.backward() # grad can be implicitly created only for scalar outputs
t = c.sum()
t.backward()
print(a.grad)
# with torch.no_grad():

a.grad=None  # 手动清除梯度
# 第二次梯度
c = a + b
t = c.sum()
t.backward()
print(a.grad)

None
tensor([[1., 1.],
        [1., 1.]])
tensor([[1., 1.],
        [1., 1.]])


### with torch.no_grad()

In [30]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True)
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

# 初始梯度为None
print(a.grad)

with torch.no_grad():
    c = a + b
    t = c.sum()
# t.backward() # RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
c1 = a + b
t1 = c1.sum()
t1.backward()

None
tensor([[1., 1.],
        [1., 1.]])


#### 梯度的打开与关闭是针对运算过程而言

In [37]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True)
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

# 初始梯度为None
print(a.grad)

c = a + b
t = c.sum()
with torch.no_grad():
    t.backward() # RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
print(a.grad)

None
tensor([[1., 1.],
        [1., 1.]])


### 在特定情况下打开梯度
torch.set_grad_enabled

In [33]:
a = torch.full((2,2),1,dtype=torch.float32,requires_grad =True)
b = torch.full((2,2),2,dtype=torch.float32,requires_grad =True)

# 初始梯度为None
print(a.grad)

# 第一次累加的梯度
c = a + b
t = c.sum()
t.backward()
print(a.grad)
# with torch.no_grad():

# 打开梯度
with torch.set_grad_enabled(True):
    c = a + b
    t = c.sum()
    t.backward()
    print(a.grad)

# 关闭梯度
with torch.set_grad_enabled(False):
    c = a + b
    t = c.sum()
# t.backward() # RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
print(a.grad)

None
tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.]])
