使用以下三阶多项式来生成训练和测试数据的标签：

$$
y = 5 + 1.2x - \frac{3.4x^2}{2!} + \frac{5.6x^3}{3!} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, 0.1^2)
$$


In [1]:
import numpy as np
import math

# 关于np.power的演示例子
import numpy as np

features = np.array([[2.0], [3.0], [4.0]])  # 3 个样本，每个是 1 个数
max_degree = 5

# 构造幂次 [0, 1, 2, 3, 4]
powers = np.arange(max_degree).reshape(1, -1)

# 计算多项式特征矩阵
poly_features = np.power(features, powers)

# [[ 1.  2.  4.   8.   16. ]   ← 2 的 [0, 1, 2, 3, 4] 次幂
#  [ 1.  3.  9.  27.   81. ]   ← 3 的幂
#  [ 1.  4. 16.  64.  256. ]]  ← 4 的幂
print(poly_features)


[[  1.   2.   4.   8.  16.]
 [  1.   3.   9.  27.  81.]
 [  1.   4.  16.  64. 256.]]


In [4]:
import numpy as np
import math

max_degree = 20
n_train, n_test = 100, 100

# 创建一个全 0 的权重向量，并设置前 4 项为真实多项式系数
true_w = np.zeros(max_degree)
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])  # 表示三阶多项式的真实系数

# 从标准正态分布中采样输入特征
features = np.random.normal(size=(n_train + n_test, 1))
np.random.shuffle(features)

# 构建多项式特征矩阵
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1))

# 将每一项除以阶乘：x^i / i!
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)  # gamma(n+1) = n!

# 根据多项式模型计算标签（y = Xw）
# labels = np.dot(poly_features, true_w)
labels = poly_features@true_w

print(labels.shape)

# 加上高斯噪声 ε ~ N(0, 0.1^2)
labels += np.random.normal(scale=0.1, size=labels.shape)


(200,)


In [5]:
import torch

# 把数据从np转换成tensor
true_w, features, poly_features, labels = [
    torch.tensor(x, dtype=torch.float32)
    for x in [true_w, features, poly_features, labels]
]

# 查看前两个样本的原始特征、多项式特征 和 标签
features[:2], poly_features[:2, :], labels[:2]


(tensor([[ 0.1236],
         [-0.7818]]),
 tensor([[ 1.0000e+00,  1.2363e-01,  7.6427e-03,  3.1497e-04,  9.7351e-06,
           2.4072e-07,  4.9602e-09,  8.7607e-11,  1.3539e-12,  1.8599e-14,
           2.2994e-16,  2.5844e-18,  2.6627e-20,  2.5323e-22,  2.2363e-24,
           1.8432e-26,  1.4243e-28,  1.0358e-30,  7.1146e-33,  4.6295e-35],
         [ 1.0000e+00, -7.8184e-01,  3.0564e-01, -7.9654e-02,  1.5569e-02,
          -2.4345e-03,  3.1724e-04, -3.5433e-05,  3.4629e-06, -3.0083e-07,
           2.3520e-08, -1.6717e-09,  1.0892e-10, -6.5505e-12,  3.6582e-13,
          -1.9068e-14,  9.3175e-16, -4.2852e-17,  1.8613e-18, -7.6592e-20]]),
 tensor([5.0737, 2.5503]))

In [11]:
from ml_utils import Accumulator

def evaluate_loss(net, data_iter, loss):
    """评估给定数据集上模型的损失。"""
    metric = Accumulator(2)  # [累计的 loss 总和, 样本总数]

    for X, y in data_iter:
        out = net(X)                          # 预测值
        y = y.reshape(out.shape)              # 调整 y 的形状与 out 匹配
        l = loss(out, y)                      # 计算损失张量（每个样本都有一个）

        metric.add(l.sum(), l.numel())        # 累加总损失 和 样本数

    return metric[0] / metric[1]              # 返回平均损失


In [16]:
from torch import nn
from torch.utils import data
from ml_utils import train_epoch_ch3
# from ml_utils import load_array
def load_array(data_arrays, batch_size, is_train=True):
    """构造一个 PyTorch 数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)



def train(train_features, test_features, train_labels, test_labels, num_epochs=400):
    loss = nn.MSELoss()
    input_shape = train_features.shape[-1]

    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False))

    batch_size = min(10, train_labels.shape[0])
    train_iter = load_array((train_features, train_labels.reshape(-1, 1)), batch_size)
    test_iter = load_array((test_features, test_labels.reshape(-1, 1)), batch_size, is_train=False)

    trainer = torch.optim.SGD(net.parameters(), lr=0.01)

    for epoch in range(num_epochs):
        train_epoch_ch3(net, train_iter, loss, trainer)

    print('weight:', net[0].weight.data.numpy())

# 最终得到的值和真实值很接近
train(
    poly_features[:n_train, :4],     # 训练集输入（前 n_train 条，每条取前 4 个特征）
    poly_features[n_train:, :4],     # 测试集输入（剩下的样本，同样取前 4 个特征）
    labels[:n_train],                # 训练集标签
    labels[n_train:]                 # 测试集标签
)

# 欠拟合，多项式给的不够
train(
    poly_features[:n_train, :2],     # 训练集输入（前 n_train 条，每条取前 4 个特征）
    poly_features[n_train:, :2],     # 测试集输入（剩下的样本，同样取前 4 个特征）
    labels[:n_train],                # 训练集标签
    labels[n_train:]                 # 测试集标签
)

# 过拟合
train(
    poly_features[:n_train, :],     # 训练集输入（前 n_train 条，每条取前 4 个特征）
    poly_features[n_train:, :],     # 测试集输入（剩下的样本，同样取前 4 个特征）
    labels[:n_train],                # 训练集标签
    labels[n_train:]                 # 测试集标签
)



weight: [[ 4.9872956  1.2175598 -3.3753257  5.506545 ]]
weight: [[3.839045 2.760343]]
weight: [[ 5.0011163   1.2955995  -3.4031477   5.0004973  -0.05983304  1.5254266
  -0.02215476  0.2144047  -0.09985335 -0.03857401 -0.21245414  0.16638386
  -0.07887    -0.22338955 -0.12070329  0.16347791 -0.02492041  0.03927466
   0.21709704  0.1377159 ]]
