In [None]:
import math
import numpy as np
import torch
from torch import nn 
from d2l import torch as d2l

$$
y=5+1.2 x-3.4 \frac{x^2}{2!}+5.6 \frac{x^3}{3!}+\epsilon \text { where } \epsilon \sim \mathcal{N}\left(0,0.1^2\right)
$$

In [None]:
max_degree = 20 #多项式的最大阶数
n_train,n_test = 100,100 #训练数据集和测试数据集的大小
true_w = np.zeros(max_degree) 
true_w[0:4] = np.array([5,1.2,-3.4,5.6])#前四个系数，后十六个系数是0

features = np.random.normal(size=(n_train+n_test,1)) #列向量
np.random.shuffle(features)
poly_features = np.power(features,np.arange(max_degree).reshape(1,-1))#基数数组是列向量，指数数组是二维数组行向量，计算[[x1^0,x1^1,...],[x2^0,x2^1,...],...],这里用到了广播机制
for i in range(max_degree):
    poly_features[:,i]/=math.gamma(i+1) #gamma(n)=(n-1)!点除以,[:,i]每一行第i列，切片技术，/=math.gamma(i+1)运用到了广播机制
#labels的维度: (n_train+n_test,)
labels = np.dot(poly_features,true_w)
labels += np.random.normal(scale=0.1,size=labels.shape)

In [None]:
#numpy array 转换成tensor
true_w,features,poly_features,labels = [torch.tensor(x,dtype=torch.float32) for x in [true_w,features,poly_features,labels]]
features[:2],poly_features[:2,:],labels[:2]#[:2]取前两行，[:2,:]取前两行，行是第0个维度

In [None]:
def evaluate_loss(net,data_iter,loss):
    metric = d2l.Accumulator(2)
    for X,y in data_iter:
        out = net(X)
        y = y.reshape(out.shape)
        l = loss(out,y)
        metric.add(l.sum(),l.numel())
    return metric[0]/metric[1]

In [None]:
from d2l.torch import Animator
class Accumulator:
    # 在n个变量上累加
    def __init__(self, n):
        self.data = [0.0]*n  # 生成一个n个元素的列表

    def add(self, *args):
        self.data = [a+float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data([0.0]*len(len(self.data)))

    def __getitem__(self, idx):
        return self.data[idx]

#分类精度,数据集的某一批的分类的准确率
def accuracy(y_hat,y):
    #计算预测正确的数量
    if len(y_hat.shape)>1 and y_hat.shape[1]>1:
       # print(y_hat.argmax(axis=1))
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

# 计算数据集上的精度
def evaluate_accuracy(net, data_iter):
    # 计算指定数据集上的精度
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式 训练模式下：Batch Normalization 层会根据当前 mini-batch 的统计数据（均值和方差）进行归一化。评估模式下：Batch Normalization 层使用整个训练过程中的全局统计数据（均值和方差）进行归一化。
    metric = Accumulator(2)  # 正确预测数，预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0]/metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):
    # 训练模型一轮
    # 将模型设置为训练模式，这里的ch3是第三章的意思，李沐已经把他封装在d2l库中了
    if isinstance(net, torch.nn.Module):
        net.train()  # 训练模式下：Batch Normalization 层会根据当前 mini-batch 的统计数据（均值和方差）进行归一化。评估模式下：Batch Normalization 层使用整个训练过程中的全局统计数据（均值和方差）进行归一化。
    metric = Accumulator(3)  # 列表三个元素依次是训练损失总和，训练准确度总和，样本数
    for X, y in train_iter:
        # 计算梯度并且更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用pytorch的内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    return metric[0]/metric[2], metric[1]/metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9], legend=[
                        'train loss', 'train acc', 'test acc'])  # 训练损失函数，训练准确性，测试准确性
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch+1, train_metrics+(test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


In [None]:
def train(train_features, test_features, train_labels, test_labels,
          num_epochs=400):
    loss = nn.MSELoss()
    input_shape = train_features.shape[-1]
    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False))
    batch_size = min(10, train_labels.shape[0])
    train_iter = d2l.load_array((train_features, train_labels.reshape(-1, 1)),
                                batch_size)
    test_iter = d2l.load_array((test_features, test_labels.reshape(-1, 1)),
                               batch_size, is_train=False)
    trainer = torch.optim.SGD(net.parameters(), lr=0.01)
    animator = d2l.Animator(xlabel='epoch', ylabel='loss', yscale='log',
                            xlim=[1, num_epochs], ylim=[1e-3, 1e2],
                            legend=['train', 'test'])
    for epoch in range(num_epochs):
        train_epoch_ch3(net, train_iter, loss, trainer)
        if epoch == 0 or (epoch + 1) % 20 == 0:
            animator.add(epoch + 1, (evaluate_loss(
                net, train_iter, loss), evaluate_loss(net, test_iter, loss)))
    print('weight:', net[0].weight.data.numpy())


In [None]:
#正常拟合
train(poly_features[:n_train,:4],poly_features[n_train:,:4],labels[:n_train],labels[n_train:])

In [None]:
#欠拟合
print(poly_features[:n_train,:].shape,poly_features[n_train:,:2].shape,labels[:n_train].shape,labels[n_train:].shape)
train(poly_features[:n_train,:],poly_features[n_train:,:2],labels[:n_train],labels[n_train:])

In [None]:
#过拟合
train(poly_features[:n_train,:],poly_features[n_train:,:],labels[:n_train],labels[n_train:],num_epochs=1500)