In [1]:
import torch as t
from torch.autograd import Variable as V


In [2]:
x = V(t.ones(1))
b = V(t.rand(1),requires_grad = True)
w = V(t.rand(1),requires_grad = True)
y = w * x #等价于y = w.mul(z)
z = y +b #等价于z = y.add(b)


In [3]:
x.requires_grad,b.requires_grad,w.requires_grad

(False, True, True)

In [4]:
#y和z未指定求导，但是w和b需要求导，存在依赖
y.requires_grad,z.requires_grad

(True, True)

In [5]:
x.is_leaf,b.is_leaf,w.is_leaf,y.is_leaf,z.is_leaf

(True, True, True, False, False)

In [6]:
#z是add函数的输出，所以它的反向传播函数是AddBackward
z.grad_fn

<AddBackward0 at 0x27736472710>

In [7]:
# next_functions保存的是grad_fn的输入，grad_fn的输入为y(乘法输出)和b
z.grad_fn.next_functions

((<MulBackward0 at 0x27746ab2860>, 0), (<AccumulateGrad at 0x27746ab28d0>, 0))

In [8]:
z.grad_fn.next_functions[0][0] == y.grad_fn

True

In [9]:
#第一个是w,叶子节点，需要求导，梯度累加的
#第二个是x，叶子节点，不需要求导，所以为None
y.grad_fn.next_functions

((<AccumulateGrad at 0x27746ab2940>, 0), (None, 0))

In [10]:
w.grad_fn,x.grad_fn

(None, None)

In [11]:
#使用ratain_graph保存buffer
z.backward(retain_graph = True)
w.grad

tensor([1.])

In [12]:
# 多次反向传播，梯度累加，这也就是w中AccumulateGrad标识的含义
z.backward(retain_graph = True)
w.grad


tensor([2.])

##### PyTorch使用的是动态图，它的计算图在每次前向传播时都是从头开始构建，所以它能够使用Python控制语句（如for、if等）根据需求创建计算图。这点在自然语言处理领域中很有用，它意味着你不需要事先构建所有可能用到的图的路径，图在运行时才构建。

In [13]:
def abs(x):
    if x.data[0]>0:
        return x
    else:
        return -x

x = V(t.ones(1),requires_grad = True)
y = abs(x)
y.backward()
x.grad


tensor([1.])

In [14]:
x = V(-1 * t.ones(1),requires_grad = True)
y = abs(x)
y.backward()
print(x.grad)


tensor([-1.])


In [15]:


def f(x):
    result = 1
    for ii in x:
        if ii.item()>0: result=ii*result
    return result
# x = V(t.randn(6,1),requires_grad = True)
x = V(t.Tensor([-2,-1,0,1,2,3]),requires_grad=True)
y = f(x) # y = x[3]*x[4]*x[5]
y.backward()
x.grad


tensor([0., 0., 0., 6., 3., 2.])