# 线性回归的实现
只使用Pytorch 张量和求导，从零实现一个线性回归模型。

In [1]:
%matplotlib inline
import random, torch

首先定义一个线性函数，并且利用该函数生成带有噪音的数据集：

In [2]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
def gen_data(w, b, num):
    # y = Wx + b
    x = torch.normal(0, 1, (num, len(w))) # features : mean=0, sd=1
    y = x.mv(w) + b
    y+= torch.normal(0, 0.01, y.shape) # add noise : mean=0, sd=0.01
    return x,y.reshape(-1, 1)

features, labels = gen_data(true_w, true_b, 1000)
print(features[0:10])
print(labels[0:10])

tensor([[-2.1616,  1.6216],
        [-0.2649, -1.0222],
        [-1.7370,  0.5960],
        [ 0.2777,  0.0789],
        [ 0.3846, -0.6121],
        [-0.0747, -0.6739],
        [ 1.4372,  0.5424],
        [ 1.1284,  0.4688],
        [ 1.2706,  1.2481],
        [-0.9543,  1.1708]])
tensor([[-5.6345],
        [ 7.1435],
        [-1.3044],
        [ 4.4781],
        [ 7.0516],
        [ 6.3518],
        [ 5.2336],
        [ 4.8713],
        [ 2.5124],
        [-1.7075]])


定义一个读取数据集(feature, labels的组合)的函数，实现随机读取一定数量(Batch size)的样本：

In [3]:
def data_iter(batch_size, features, labels):
    total = len(labels)
    indexs = list(range(total))
    random.shuffle(indexs)
    for i in range(0, total, batch_size):
        batch_indexs = torch.tensor(indexs[i:min(i+batch_size, total)])
        yield features[batch_indexs], labels[batch_indexs]
        
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break
    

tensor([[-1.0486,  2.4155],
        [-0.8513, -0.1747],
        [-0.6692,  0.9955],
        [ 0.5147, -0.0248],
        [ 0.1513, -1.5683],
        [-0.6420, -1.6906],
        [ 0.3862, -0.2026],
        [-0.7877,  1.1728],
        [ 0.5581, -0.3032],
        [ 1.8395, -1.0935]]) tensor([[-6.1115],
        [ 3.0895],
        [-0.5229],
        [ 5.3268],
        [ 9.8300],
        [ 8.6800],
        [ 5.6661],
        [-1.3529],
        [ 6.3547],
        [11.5772]])


创建一个线性回归模型，y^hat = Wx + b,其中W,b是模型需要学习的参数，分别初始化两个参数：

In [4]:
def linear_regression(X, w, b):
    return X.mm(w) + b

w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

损失函数用来衡量y与y^hat的差异，定义平方损失函数：

In [5]:
def squared_loss(y_hat, y):
    # 确保y_hat 与 y 维数一致，避免广播
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

优化函数用于更新参数，此处使用随机梯度下降作为优化算法：

In [6]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for p in params:
            p -= lr * p.grad / batch_size # 梯度下降
            p.grad.zero_() # clear grad

采用迭代周期为3，学习率为0.03的超参数组合，进行训练，每个训练周期随机读取数据：

In [7]:
epochs = 3
lr = 0.03
batch_size = 10

for epoch in range(epochs):
    for X, y in data_iter(batch_size, features, labels):
        y_hat = linear_regression(X, w, b)
        l = squared_loss(y_hat, y)
        l.sum().backward() # batch_size个loss的和
        sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_loss = squared_loss(linear_regression(features, w, b), labels)
        print('epoch %d : loss %f' % (epoch, train_loss.mean()))

epoch 0 : loss 0.027870
epoch 1 : loss 0.000096
epoch 2 : loss 0.000050


loss随着训练的进行，逐渐降低，表明预测的精度也在逐步提高，由于线性回归模型过于简单，因此loss最终会停留在一个稳定的值。