In [1]:
import torch
from torch.utils import data
import d2l_tools as d2l
import numpy as np

In [2]:
true_w = torch.tensor([2,-3.4])
true_b = 4.3
features, labels = d2l.synthetic_data(true_w,true_b,1000)

![image.png](attachment:image.png)

In [3]:
# 读取数据集 (is_train ——> 是否打乱顺序)
def load_array(data_arrays, batch_size, is_train=True):
    # 构造Pytorch数据迭代器
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [4]:
# next从迭代器中选取第一项
next(iter(data_iter))

[tensor([[-0.3004,  0.0919],
         [-0.3728, -0.3876],
         [-0.9406, -1.2575],
         [ 0.3064,  0.9226],
         [-0.5109,  0.3138],
         [ 0.0060,  2.7536],
         [ 0.4085,  2.4344],
         [ 2.1169, -0.3367],
         [ 0.0083, -2.1189],
         [ 1.6071, -3.6199]]),
 tensor([[ 3.3865],
         [ 4.8552],
         [ 6.7168],
         [ 1.7853],
         [ 2.1961],
         [-5.0550],
         [-3.1765],
         [ 9.6850],
         [11.5029],
         [19.8136]])]

![image.png](attachment:image.png)

In [5]:
# 定义模型
from torch import nn

net = nn.Sequential(nn.Linear(2,1))

In [6]:
# 初始化参数(通过net[0]选择网络中的第一个图层)
net[0].weight.data.normal_(0,0.01)
net[0].bias.data.fill_(0)
net,net[0],net[0].weight,net[0].bias,net.parameters()

(Sequential(
   (0): Linear(in_features=2, out_features=1, bias=True)
 ),
 Linear(in_features=2, out_features=1, bias=True),
 Parameter containing:
 tensor([[0.0044, 0.0069]], requires_grad=True),
 Parameter containing:
 tensor([0.], requires_grad=True),
 <generator object Module.parameters at 0x000001A15BD9CDD0>)

In [7]:
#定义损失函数(L2范数)--均方误差
loss = nn.MSELoss()

In [8]:
# 定义优化算法 (小批量随机梯度下降)
trainer = torch.optim.SGD(net.parameters(),lr=0.03)

![image.png](attachment:image.png)

In [9]:
# 训练
num_epochs = 3

times = d2l.Timer()
for epoch in range(num_epochs):
    for x, y in data_iter:
        # net中已有参数,因此传入数据即可
        l = loss(net(x),y)
        # 将梯度设为0
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features),labels)
    print(f'epoch {epoch+1}, loss {l}')
times.stop()

epoch 1, loss 0.00024183013010770082
epoch 2, loss 9.169943950837478e-05
epoch 3, loss 9.148252138402313e-05


0.853644847869873

In [10]:
# 差值
w = net[0].weight.data
b = net[0].bias.data
print(f'w的估计误差:{true_w - w}')
print(f'b的估计误差:{true_b - b}')

w的估计误差:tensor([[0.0004, 0.0003]])
b的估计误差:tensor([0.0003])


***练习***
![image.png](attachment:image.png)

In [11]:
# QA1
num_epochs = 3
trainer = torch.optim.SGD(net.parameters(),lr=0.03)

def average_loss(y_hat, y):
    return sum(abs(y_hat - y.reshape(y_hat.shape))) / len(y_hat)

loss = average_loss

for epoch in range(num_epochs):
    for x, y in data_iter:
        # net中已有参数,因此传入数据即可
        l = loss(net(x),y)
        # 将梯度设为0
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features),labels)
    print(f'epoch {epoch+1}, loss {float(l.detach())}')
    
# 可以看出，若用相同的lr，则下降速度会减慢

epoch 1, loss 0.022844325751066208
epoch 2, loss 0.011360175907611847
epoch 3, loss 0.013328345492482185


In [12]:
num_epochs = 3
trainer = torch.optim.SGD(net.parameters(),lr=0.001)

"""
def average_loss(y_hat, y):
    return sum(abs(y_hat - y.reshape(y_hat.shape))) / len(y_hat)
"""

loss = nn.L1Loss()

for epoch in range(num_epochs):
    for x, y in data_iter:
        # net中已有参数,因此传入数据即可
        l = loss(net(x),y)
        # 将梯度设为0
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features),labels)
    print(f'epoch {epoch+1}, loss {float(l.detach())}')
# 如果将小批量的总损失替换为小批量损失的平均值，应将lr调小

epoch 1, loss 0.007629047147929668
epoch 2, loss 0.007721413858234882
epoch 3, loss 0.007658297196030617


![image-2.png](attachment:image-2.png)

In [13]:
# QA3 Huber_loss 有一个超参数
def Huber_loss(y_hat, y, threshold):
    loss_sum = torch.tensor(0,dtype=torch.float32,requires_grad=True)
    lists = abs(y_hat - y)
    for i in range(len(lists)):
        if lists[i] > threshold:
            loss_sum = loss_sum + lists[i] - threshold / 2
        else:
            loss_sum = loss_sum + lists[i]**2 / (2 * threshold)
    return loss_sum

In [14]:
# Huber_loss的简易实现
def huber(y_hat, y, delta):
    loss = torch.where(abs(y_hat - y) < delta , 0.5*((y_hat - y)**2), delta*abs(y_hat - y) - 0.5*(delta**2))
    return sum(loss)

In [15]:
num_epochs = 3
trainer = torch.optim.SGD(net.parameters(),lr=0.03)
threshold = 10000

loss = huber

for epoch in range(num_epochs):
    for x, y in data_iter:
        # net中已有参数,因此传入数据即可
        l = loss(net(x),y,threshold)
        # 将梯度设为0
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features),labels,threshold)
    print(f'epoch {epoch+1}, loss {float(l.detach())}')

epoch 1, loss 0.0466894656419754
epoch 2, loss 0.04592466354370117
epoch 3, loss 0.048574939370155334


In [16]:
# QA3
net[0].weight.grad, net[0].bias.grad

(tensor([[0.0289, 0.0048]]), tensor([-0.0059]))