In [1]:
import torch

In [2]:
a = torch.tensor(2.0,requires_grad=True)
b = a.exp()
print(b)

tensor(7.3891, grad_fn=<ExpBackward>)


In [3]:
input = torch.ones([2,2],requires_grad=False)
w1 = torch.tensor(2.0,requires_grad = True)
w2 = torch.tensor(3.0,requires_grad = True)
w3 = torch.tensor(3.0,requires_grad = True)

In [4]:
l1 = input * w1
l2 = l1+w2
l3 = l1*w3
l4 = l2*l3
loss = l4.mean()

In [9]:
print(w1.data,w1.grad,w1.grad_fn)

tensor(2.) tensor(21.) None


In [10]:
print(l1.data,l1.grad,l1.grad_fn)

tensor([[2., 2.],
        [2., 2.]]) None <MulBackward0 object at 0x1080ab2b0>




In [11]:
print(loss.data,loss.grad,loss.grad_fn)

tensor(30.) None <MeanBackward0 object at 0x1081c8da0>




In [8]:
loss.backward()

# 叶子张量 leaf tensor:反向传播时，只保留属性requires_grad和is_leaf为真的张量得导数。
# requires_grad为真，is_leaf为假时，此张量得导数作为中间结果用于计算叶子张量得导师。
# requires_grad is False,is_leaf is False,次张量参与导数计算

In [14]:
a = torch.ones([2,2],requires_grad=True)
print(a.is_leaf)

True


In [15]:
b = a+2 
print(b.is_leaf)

False


# 因为b不是用户创建的，是通过计算生成的

# 叶子张量的作用：节省内存或者显存 

# 叶子节点的grad_fn都为空
# 非叶子节点点grad_fn都不为空
# 果我们想保留中间变量的导数，该怎么操作？
### 通过使用tensor.retain_grad()

In [6]:
loss = l4.mean()
l1.retain_grad()
l4.retain_grad()
loss.retain_grad()
loss.backward()
print(loss.grad)
print(l4.grad)
print(l1.grad)

tensor(1.)
tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
tensor([[5.2500, 5.2500],
        [5.2500, 5.2500]])


# 如果我们只想进行debug,只需要输出中间变量的导数信息，而不需要保存他们，我们还可以使用tensor.register_hook,如下

In [5]:
loss2 = l4.mean()
l1.register_hook(lambda grad:print("l1 grad:",grad))
l4.register_hook(lambda grad:print("l4 grad",grad))
loss2.register_hook(lambda grad:print("loss grad:",grad))
loss2.backward()
print(loss2.grad)

loss grad: tensor(1.)
l4 grad tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
l1 grad: tensor([[5.2500, 5.2500],
        [5.2500, 5.2500]])
None




# 可以看到，是先打印loss的grade,后打印l4,l1得grad得
# 并且在最后print(loss2.grad)得时候
# 打印出了none
# 这说明loss.grad在print完之后，就被清除掉了

# pytorch 中，Hook的作用非常大

# inplace 操作:inplace operation

# 在不更改变量的内存地址的情况下，直接修改变量的值。就叫inplace操作

In [16]:
a  = torch.tensor([3.0,1.0])
print(id(a))
a = a.exp() # 不是inplace
print(id(a))
b=[1,2]
print(id(b))
b[0]=10# 是inplace
print(id(b))

4527517632
4527516768
4985982728
4985982728


# 以上id有变化的不属于inplace操作,id没有变化的，才是inplace操作

# pytorch 怎么监测tensor发生了inplace操作？通过tensor._version

In [20]:
a = torch.tensor([1.0,3.0],requires_grad=True)
b = a+2
print(b._version)

loss = (b * b).mean()
b[0] = 1000.0
print(b._version)

loss.backward()

0
1


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [2]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

#### RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [2]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead.

# 每次tensor进行inplace时，_version的值就会加1，在正向传播过程中，求导系统记录的b的version是0，但是反向传播过程中，求导系统发现b的version变成了

# 对于requires_grad=True的叶子节点的值，在求梯度之前，是不允许修改的。
# 类似一下应用：

## RuntimeError: leaf variable has been moved into the graph interior

In [30]:
a = torch.tensor([10.,5.,2.,3.],requires_grad=True)
print(a,a.is_leaf)
print(a,id(a))

tensor([10.,  5.,  2.,  3.], requires_grad=True) True
tensor([10.,  5.,  2.,  3.], requires_grad=True) 4527514248


In [31]:
a[:]=0
a.add_(10.)
print(a,a.is_leaf)
print(a,id(a))

tensor([0., 0., 0., 0.], grad_fn=<CopySlices>) False
tensor([0., 0., 0., 0.], grad_fn=<CopySlices>) 4527514248


In [29]:
loss3 = (a*a).mean()
loss3.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

 # 在使用a[:]=0时，实际上是用inplace操作把一个叶子节点变成了非叶子节点了，这样的话，导数就不会被保存了，就变成none了。