# 线性回归模拟

### 从零模拟

1. 根据给定w和b，生成数据集

In [1]:
# 自己制造数据集
import torch
def synthetic_data(w, b, num_examples):
    """生成 y = Xw + b + 噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

2. 设定w和b的初始值

In [2]:
true_w = torch.tensor([-1,8.2])
true_b = torch.tensor([2.5])

features, labels = synthetic_data(true_w, true_b, 1000)

features.shape, labels.shape

(torch.Size([1000, 2]), torch.Size([1000, 1]))

3. 定义网络结构

In [3]:
def linear_regression(X,w,b):
    return torch.matmul(X,w)+b

4. 设置数据加载器

In [4]:
import random
import numpy as np
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = indices[i: min(i + batch_size, num_examples)]
        yield features[j], labels[j]

5. 初始化需要学习的参数w和b

In [5]:
w = torch.normal(0,0.01,size=(2,1),requires_grad=True)
b = torch.zeros(1,requires_grad=True)

6. 定义损失函数

In [6]:
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape))**2 / 2

7. 定义优化器

In [7]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size # 这一步是因为w的梯度是累加的，所以要除以batch_size，得到这个batch的平均梯度
            param.grad.zero_() # 这里要把梯度置零，否则下一次迭代时，梯度会累加到这个batch的梯度上

8. 开始训练！

In [8]:
lr = 0.01
num_epochs = 10
batch_size = 10
net = linear_regression
loss = squared_loss
optimizer = sgd

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)
        l.sum().backward() # 这里l一定要sum!否则l是长度为batch_size的向量,正常情况下我们对标量求导
        optimizer([w, b], lr, batch_size)
    print(f'epoch {epoch + 1}, loss {float(l.sum()):f}')

epoch 1, loss 36.510880
epoch 2, loss 5.773436
epoch 3, loss 0.408471
epoch 4, loss 0.221545
epoch 5, loss 0.020463
epoch 6, loss 0.006198
epoch 7, loss 0.000917
epoch 8, loss 0.000632
epoch 9, loss 0.000185
epoch 10, loss 0.000534


9. 评估：

In [9]:
print("训练后的w值为"+str(w.reshape(-1))+",误差为"+str(true_w-w.reshape(-1)))
print("训练后的b值为"+str(b.reshape(-1))+",误差为"+str(true_b-b.reshape(-1)))

训练后的w值为tensor([-0.9999,  8.1994], grad_fn=<ViewBackward0>),误差为tensor([-7.2896e-05,  6.2847e-04], grad_fn=<SubBackward0>)
训练后的b值为tensor([2.5001], grad_fn=<ViewBackward0>),误差为tensor([-5.2452e-05], grad_fn=<SubBackward0>)


### 简洁实现

In [10]:
import numpy as np
import torch
from torch.utils import data

true_w = torch.tensor([4.2,-1])
true_b = torch.tensor(1.3)

features, labels = synthetic_data(true_w, true_b, 1000)

使用torch.utils.data中的工具来帮助实现数据集的加载

In [11]:
def load_array(data_arrays, batch_size, is_train=True):
    """构造一个数据迭代器。"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

构建线性网络模型

In [12]:
from torch import nn
net = nn.Sequential(nn.Linear(2, 1))
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
print(net)

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)


损失函数、优化器

In [13]:
loss = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

开始训练：

In [14]:
num_epochs = 10
lr = 0.01
batch_size = 32
data_iter = load_array((features, labels), batch_size)

for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    train_l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

tensor([[ 0.0886,  0.7094],
        [ 1.1554,  1.1027],
        [-1.8997, -0.2168],
        [ 1.9487,  0.4601],
        [-0.1511, -0.7074],
        [ 0.6546,  0.1550],
        [ 0.5974,  0.3824],
        [ 1.1563, -0.8736],
        [-0.9039,  1.3304],
        [-1.7756, -0.4161],
        [-0.7413, -1.1835],
        [ 0.5877, -1.2478],
        [ 0.7199, -0.9388],
        [-0.5010,  0.3430],
        [ 0.8634,  0.8496],
        [-1.9538,  0.1143],
        [-1.1758,  0.3086],
        [ 1.2168, -1.1472],
        [ 1.3569,  0.5104],
        [-0.1550,  1.8032],
        [ 1.1952,  1.0775],
        [-0.0390,  0.1321],
        [ 0.1198,  0.9818],
        [ 2.2436,  0.4355],
        [ 1.1230, -1.5644],
        [-0.7553,  1.7079],
        [ 0.2388,  0.3141],
        [-0.5787,  1.5672],
        [ 0.5788,  1.8456],
        [ 2.0564, -0.9529],
        [ 0.6542, -0.4389],
        [-2.6233,  2.4472]]) tensor([[  0.9746],
        [  5.0447],
        [ -6.4659],
        [  9.0505],
        [  1.3615],
   

### 提醒

1. 每次求导前梯度清零

2. 自己定义的损失函数记得求所有batch的和(loss.sum()), 梯度记得除以batch_size