In [1]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)

In [2]:
def load_array(data_arrays, batch_size, is_train=True):  #@save
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [5]:
next(iter(data_iter))

[tensor([[-1.1455e+00, -3.1440e-01],
         [-8.3756e-01,  1.1176e+00],
         [-1.1291e+00, -1.9585e+00],
         [-1.2938e+00, -1.4421e+00],
         [-5.6697e-01, -2.4040e-01],
         [-3.9582e-01,  3.5603e-01],
         [ 2.3211e-01, -1.1521e+00],
         [ 7.2597e-01,  1.6181e-01],
         [ 9.9252e-04,  1.9350e+00],
         [-8.4096e-01,  9.3829e-01]]),
 tensor([[ 2.9777],
         [-1.2656],
         [ 8.6025],
         [ 6.5266],
         [ 3.8799],
         [ 2.2061],
         [ 8.5827],
         [ 5.0945],
         [-2.3684],
         [-0.6626]])]

In [7]:
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))

In [15]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [16]:
loss = nn.MSELoss()

In [17]:
trainer = torch.optim.SGD(net.parameters(), lr=0.03)

In [18]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'Epoch {epoch+1}, loss: {l:f}')

Epoch 1, loss: 0.000209
Epoch 2, loss: 0.000101
Epoch 3, loss: 0.000100


In [19]:
w = net[0].weight.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差：', true_b - b)

w的估计误差： tensor([ 0.0005, -0.0001])
b的估计误差： tensor([0.0006])


# Homework

2. 查看深度学习框架文档，它们提供了哪些损失函数和初始化方法？用Huber损失代替原损失，即


In [20]:
loss = nn.HuberLoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'Epoch {epoch+1}, loss: {l:f}')

Epoch 1, loss: 0.000050
Epoch 2, loss: 0.000050
Epoch 3, loss: 0.000050


In [22]:
help(torch.nn)

Help on package torch.nn in torch:

NAME
    torch.nn

PACKAGE CONTENTS
    _reduction
    backends (package)
    common_types
    cpp
    functional
    grad
    init
    intrinsic (package)
    modules (package)
    parallel (package)
    parameter
    qat (package)
    quantizable (package)
    quantized (package)
    utils (package)

FUNCTIONS
    factory_kwargs(kwargs)
        Given kwargs, returns a canonicalized dict of factory kwargs that can be directly passed
        to factory functions like torch.empty, or errors if unrecognized kwargs are present.
        
        This function makes it simple to write code like this::
        
            class MyModule(nn.Module):
                def __init__(self, **kwargs):
                    factory_kwargs = torch.nn.factory_kwargs(kwargs)
                    self.weight = Parameter(torch.empty(10, **factory_kwargs))
        
        Why should you use this function instead of just passing `kwargs` along directly?
        
        1.