## 使用梯度下降调整权重和偏置

In [1]:
import numpy as np
import torch

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [3]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)

In [4]:
# 定义模型
def model(x):
    return x @ w.t() + b

# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [6]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(18746.5391, grad_fn=<DivBackward0>)


### 计算梯度

In [7]:
loss.backward()

In [8]:
print(w)
print(w.grad)

tensor([[ 0.3394,  1.2554, -1.0599],
        [-1.8666, -0.1720,  1.3499]], requires_grad=True)
tensor([[   399.9787,     55.7572,    -76.6757],
        [-16220.6152, -16901.8535, -10435.6660]])


PyTorch 会累积梯度，下一次在损失上调用 .backward 时，新的梯度值会被加到已有的梯度值上，这可能会导致意外结果出现。

In [12]:
eta = 1e-5
with torch.no_grad():
    w -= w.grad * eta
    b -= b.grad * eta
    w.grad.zero_()
    b.grad.zero_()

In [13]:
print(w)
print(b)

tensor([[ 0.3354,  1.2548, -1.0591],
        [-1.7044, -0.0030,  1.4543]], requires_grad=True)
tensor([0.3397, 0.4434], requires_grad=True)


In [14]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(12756.5527, grad_fn=<DivBackward0>)


In [15]:
epochs = 100
# 训练100次
for epoch in range(epochs):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * eta
        b -= b.grad * eta
        w.grad.zero_()
        b.grad.zero_()

In [16]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(229.8890, grad_fn=<DivBackward0>)


In [17]:
preds

tensor([[ 60.3934,  67.1840],
        [ 71.7915, 105.5217],
        [137.1920, 126.9499],
        [ 38.2460,  19.1132],
        [ 73.8514, 138.1142]], grad_fn=<AddBackward0>)

In [18]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])