In [13]:
#2.5 Automatic Differentiation

import torch

#2.5.1 A Simple Example

#對 y=2*x^T*x 做微分

x = torch.arange(4.0) #浮點數的tensor才能require gradients
print("x ==>\n",x,"\n")

x.requires_grad_(True)  # Same as `x = torch.arange(4.0, requires_grad=True)`
x.grad  # The default value is None

y = 2 * torch.dot(x, x) #torch.dot(a,b)==> a跟b的內積(a跟b必須為1D)
print("y ==>\n",y,"\n")

y.backward() #自動計算y的梯度(backpropagation)
print("x.grad ==>\n",x.grad,"\n") #印出梯度

print("x.grad==4*x ==>\n",x.grad==4*x,"\n")

# PyTorch accumulates the gradient in default, we need to clear the previous values

print("沒歸零前的x.grad ==>\n",x.grad,"\n")

x.grad.zero_() #把x.grad歸零
print("歸零後的x.grad ==>\n",x.grad,"\n")

print("x ==>\n",x,"\n")
y = x.sum()
y.backward()
print("y ==>\n",y,"\n")
print("x.grad ==>\n",x.grad,"\n")

x ==>
 tensor([0., 1., 2., 3.]) 

y ==>
 tensor(28., grad_fn=<MulBackward0>) 

x.grad ==>
 tensor([ 0.,  4.,  8., 12.]) 

x.grad==4*x ==>
 tensor([True, True, True, True]) 

沒歸零前的x.grad ==>
 tensor([ 0.,  4.,  8., 12.]) 

歸零後的x.grad ==>
 tensor([0., 0., 0., 0.]) 

x ==>
 tensor([0., 1., 2., 3.], requires_grad=True) 

y ==>
 tensor(6., grad_fn=<SumBackward0>) 

x.grad ==>
 tensor([1., 1., 1., 1.]) 



In [14]:
#2.5.2 Backward for Non-Scalar Variables


# Invoking `backward` on a non-scalar requires passing in a `gradient` argument
# which specifies the gradient of the differentiated function w.r.t `self`.
# In our case, we simply want to sum the partial derivatives, so passing in a gradient of ones is appropriate.

x.grad.zero_()
y = x * x
# y.backward(torch.ones(len(x))) equivalent to the below
y.sum().backward()
print("x.grad ==>\n",x.grad,"\n")


x.grad ==>
 tensor([0., 2., 4., 6.]) 



In [15]:
#2.5.3 Detaching Computation


x.grad.zero_()
print("x ==>\n",x,"\n")

y = x * x
print("y ==>\n",y,"\n")

u = y.detach() #切斷y的backpropagation
print("u ==>\n",u,"\n")

z = u * x
print("z ==>\n",z,"\n")

z.sum().backward()

print("x.grad ==>\n",x.grad,"\n")
print("x.grad==u ==>\n", x.grad==u ,"\n")

x ==>
 tensor([0., 1., 2., 3.], requires_grad=True) 

y ==>
 tensor([0., 1., 4., 9.], grad_fn=<MulBackward0>) 

u ==>
 tensor([0., 1., 4., 9.]) 

z ==>
 tensor([ 0.,  1.,  8., 27.], grad_fn=<MulBackward0>) 

x.grad ==>
 tensor([0., 1., 4., 9.]) 

x.grad==u ==>
 tensor([True, True, True, True]) 



In [16]:
#2.5.4 Computing the Gradient of Python Control Flow

def function(a):
    b = a * 2
    while b.norm(p=2) < 1000: #b.norm(p=2)==>對b做2范數運算
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c



tensor(-0.8470, requires_grad=True)
tensor(True)


In [None]:
a = torch.randn(size=(), requires_grad=True)
print(a)
d = function(a)
d.backward()

print(a.grad == d / a)