In [1]:
# AUTOMATIC DIFFERENTIATION 自动微分
# autograd 给所有tensor操作提供自动微分

In [2]:
# 前向传播 记录操作拓扑，反向传播自动执行
# .requires_grad 跟踪operations
# .backward() 自动算梯度，计算tensor导数，tensor是标量，无需传参，否则传size
# .detach() 阻止跟踪
# with torch.no_grad(): 包裹代码，阻止跟踪历史，评估模型时用，设置requires_grad=True,，包含可训练参数，但不需要梯度
# Tensor and Function构成图，每个tensor的.grad_fn属性是函数

In [3]:
import torch 
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

In [4]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [5]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward>)


In [6]:
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward>) tensor(27., grad_fn=<MeanBackward1>)


In [7]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7fd2e4b062b0>


In [8]:
out.backward()

In [9]:
# 梯度d(out)/dx
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [10]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([-225.3866, -982.9327,   88.4775], grad_fn=<MulBackward>)


In [11]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
# 
y.backward(v)

print(x.grad)

tensor([ 51.2000, 512.0000,   0.0512])


In [12]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [13]:
# Create tensors.
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

# Build a computational graph.
y = w * x + b    # y = 2 * x + 3

# Compute gradients.
y.backward()

# Print out the gradients.
print(x.grad)    # x.grad = 2 
print(w.grad)    # w.grad = 1 
print(b.grad)    # b.grad = 1 

tensor(2.)
tensor(1.)
tensor(1.)


In [14]:
# Create tensors of shape (10, 3) and (10, 2).
x = torch.randn(10, 3)
y = torch.randn(10, 2)

# Build a fully connected layer.
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)

# Build loss function and optimizer.
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

# Forward pass.
pred = linear(x)

# Compute loss.
loss = criterion(pred, y)
print('loss: ', loss.item())

# Backward pass.
loss.backward()

# Print out the gradients.
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)

# 1-step gradient descent.
optimizer.step()

# You can also perform gradient descent at the low level.
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)

# Print out the loss after 1-step gradient descent.
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())


w:  Parameter containing:
tensor([[ 0.2526,  0.5655,  0.5378],
        [ 0.4701, -0.3225, -0.3477]], requires_grad=True)
b:  Parameter containing:
tensor([0.3487, 0.0303], requires_grad=True)
loss:  0.8997774124145508
dL/dw:  tensor([[-0.3102,  0.3232,  0.6453],
        [ 0.5888,  0.1786, -0.6822]])
dL/db:  tensor([0.0547, 0.1479])
loss after 1 step optimization:  0.8850268721580505


In [15]:
# Create a numpy array.
x = np.array([[1, 2], [3, 4]])

# Convert the numpy array to a torch tensor.
y = torch.from_numpy(x)

# Convert the torch tensor to a numpy array.
z = y.numpy()