In [4]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

<br></br>
<font size=4>根据带有噪声的线性模型构造一个人造数据集。 我们使用线性模型参数 w =[2, -3.4$]^T$、b = 4.2和噪声项c生成数据集及标签</font>

In [5]:
def synthetic_data(w, b, num_examples):
    """生成y = Xw + b + 噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1)) # -1是均配符，表示行数随着列数自动匹配
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

<br></br>
<font size=4>定义一个data_iter函数， 该函数接收批量大小、特征矩阵和标签向量作为输入，set成大小为batch_size的小批量
</font>

In [11]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的， 没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[ 1.5851,  0.7006],
        [ 0.8069, -1.5701],
        [-0.6452,  0.9220],
        [-0.4068, -0.1675],
        [-0.9974, -1.4948],
        [-0.9441,  0.0803],
        [-0.9079, -0.7219],
        [-0.2759, -1.2146],
        [-0.7426, -0.8097],
        [ 0.4735,  0.3907]]) 
 tensor([[ 5.0026],
        [11.1526],
        [-0.2341],
        [ 3.9658],
        [ 7.2837],
        [ 2.0387],
        [ 4.8427],
        [ 7.7800],
        [ 5.4734],
        [ 3.8100]])


<br></br>
<font size=4>定义初始化模型参数</font>

In [13]:
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
b

tensor([0.], requires_grad=True)

<br></br>
<font size=4>定义模型</font>

In [14]:
def linereg(X, w, b):
    """线性回归模型。"""
    return torch.matmul(X, w) + b

<br></br>
<font size=4>定义损失函数</font>

In [16]:
def squared_loss(y_hat, y):
    return (y_hat - y) ** 2 / 2

<br></br>
<font size=4>定义优化算法</font>

In [18]:
def sgd(params, lr, batch_size):
    """小批量随机梯度下降"""
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size # 一个批量求一下平均
            param.grad.zero_()

<br></br>
<font size=4>训练过程</font>

In [23]:
lr = 0.03
num_epoches = 3
net = linereg
loss = squared_loss

for epoch in range(num_epoches):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y) # X和y的小批量损失
        # 因为l形状是（batchsize, 1）, 而不是一个标量。 l中的所有元素被加到一起
        # 并以此计算关于[w, b]的梯度
        l.sum().backward()
        sgd([w, b], lr, batch_size) # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epochepoch {epoch + 1}, loss {float(train_l.mean())}')
        

epochepoch 1, loss 0.00018728685972746462
epochepoch 2, loss 4.99427187605761e-05
epochepoch 3, loss 5.014161069993861e-05


In [24]:
print(w, b)

tensor([[ 1.9993],
        [-3.4001]], requires_grad=True) tensor([4.1994], requires_grad=True)
