In [1]:
import random
import torch
from d2l import torch as d2l
import plotly.express as px

In [13]:
# 根据带有噪声的线性模型构造一个人造数据集
def synthetic_data(w, b, num_examples):
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = X @ w + b
    y += torch.normal(0, 0.01, y.shape) # 0.01为噪声
    return X, y.reshape((-1, 1))

In [18]:
# 定义一个data_iter函数，接受批量大小、特征矩阵、标签向量，生成大小为batch_size的小批量
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # 随机读取样本，没有顺序
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [19]:
# 定义模型
def linreg(X, w ,b):
    return X @ w + b

In [20]:
# 定义损失函数
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [21]:
# 定义优化算法
def sgd(params, lr, batch_size):
    # 小批量随机梯度下降
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

In [36]:
true_w = torch.tensor([2, -3.4, 4.6, 5])
true_b = 2.2
features, labels = synthetic_data(true_w, true_b, 2000)
# px.scatter(x=X[:, 2].detach().numpy(), y=y.detach().numpy()[:, 0])

In [37]:
# 定义初始化模型参数
w = torch.normal(0, 0.01, size=(4, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
w, b

(tensor([[-4.1599e-03],
         [-2.8797e-03],
         [-6.0918e-05],
         [ 8.0961e-04]], requires_grad=True),
 tensor([0.], requires_grad=True))

In [41]:
# 训练过程
lr = 0.02
num_epochs = 3
batch_size = 10
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # X和y的小批量损失
        # l的形状是(batch_size, 1),不是一个标量
        l.sum().backward()
        sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print('epoch:', epoch + 1, ', loss:', float(train_l.mean()))

epoch: 1 , loss: 4.978765718988143e-05
epoch: 2 , loss: 4.9655765906209126e-05
epoch: 3 , loss: 4.963951141689904e-05


In [42]:
w

tensor([[ 2.0000],
        [-3.4001],
        [ 4.6001],
        [ 5.0003]], requires_grad=True)

In [40]:
# 查看训练出的参数和真实参数
print(true_w - w.reshape(true_w.shape))
print(true_b - b)

tensor([-1.5688e-04,  3.3379e-05, -2.7132e-04, -7.0858e-04],
       grad_fn=<SubBackward0>)
tensor([-4.7922e-05], grad_fn=<RsubBackward1>)


In [43]:
# 简洁实现
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [92]:
true_w = torch.rand(200)
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 10000)

In [93]:
def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [94]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [95]:
from torch import nn

In [96]:
net = nn.Sequential(nn.Linear(true_w.shape[0], 1))

In [97]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [98]:
loss = nn.MSELoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.01)

In [102]:
num_epochs = 4
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l: f}')

epoch 1, loss  0.000123
epoch 2, loss  0.000124
epoch 3, loss  0.000123
epoch 4, loss  0.000123


In [103]:
true_w

tensor([0.9178, 0.9831, 0.6053, 0.1471, 0.8662, 0.2901, 0.5716, 0.2572, 0.8310,
        0.2433, 0.9198, 0.2509, 0.3560, 0.9154, 0.0317, 0.9903, 0.4825, 0.0959,
        0.7179, 0.5983, 0.4834, 0.5394, 0.8175, 0.9286, 0.4135, 0.3338, 0.2561,
        0.1329, 0.6716, 0.5876, 0.3028, 0.9066, 0.9169, 0.2085, 0.4401, 0.9382,
        0.4233, 0.6717, 0.2521, 0.1116, 0.8789, 0.6875, 0.1291, 0.7273, 0.6381,
        0.0062, 0.6758, 0.3479, 0.4427, 0.6760, 0.8850, 0.7108, 0.0350, 0.8772,
        0.2178, 0.1213, 0.8677, 0.7147, 0.3507, 0.4535, 0.3160, 0.8949, 0.3147,
        0.3375, 0.6551, 0.1886, 0.9045, 0.7502, 0.1981, 0.2720, 0.8783, 0.6159,
        0.7883, 0.4780, 0.1054, 0.3530, 0.6005, 0.1651, 0.5569, 0.6722, 0.3404,
        0.5443, 0.9282, 0.3432, 0.2351, 0.5705, 0.9633, 0.0887, 0.1692, 0.4801,
        0.7562, 0.5201, 0.7933, 0.5811, 0.8881, 0.7017, 0.3285, 0.4123, 0.6611,
        0.4360, 0.2852, 0.1532, 0.0674, 0.0218, 0.8927, 0.8691, 0.1770, 0.3400,
        0.7136, 0.5249, 0.0567, 0.6372, 

In [104]:
trainer.param_groups

[{'params': [Parameter containing:
   tensor([[0.9176, 0.9833, 0.6050, 0.1476, 0.8664, 0.2910, 0.5715, 0.2572, 0.8313,
            0.2430, 0.9197, 0.2505, 0.3559, 0.9160, 0.0318, 0.9901, 0.4827, 0.0961,
            0.7178, 0.5981, 0.4832, 0.5390, 0.8168, 0.9283, 0.4132, 0.3344, 0.2563,
            0.1329, 0.6715, 0.5881, 0.3022, 0.9067, 0.9172, 0.2082, 0.4398, 0.9385,
            0.4232, 0.6721, 0.2516, 0.1116, 0.8785, 0.6874, 0.1290, 0.7275, 0.6380,
            0.0060, 0.6758, 0.3473, 0.4432, 0.6760, 0.8845, 0.7112, 0.0348, 0.8769,
            0.2178, 0.1212, 0.8687, 0.7142, 0.3503, 0.4536, 0.3160, 0.8951, 0.3138,
            0.3371, 0.6550, 0.1885, 0.9052, 0.7501, 0.1977, 0.2717, 0.8783, 0.6159,
            0.7881, 0.4778, 0.1057, 0.3530, 0.6004, 0.1654, 0.5568, 0.6719, 0.3406,
            0.5443, 0.9282, 0.3432, 0.2351, 0.5706, 0.9635, 0.0887, 0.1692, 0.4798,
            0.7566, 0.5200, 0.7935, 0.5809, 0.8880, 0.7015, 0.3288, 0.4133, 0.6613,
            0.4360, 0.2853, 0.1526, 0.067