# 线性回归的简洁实现  

### 1. 加载数据集  
注意数据集的维度。

In [60]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l
from torch import nn

true_w = torch.tensor([2, -3.4])
true_b = torch.tensor([4.2])
features, labels = d2l.synthetic_data(true_w, true_b, 1000)
print(features.shape, labels.shape)

torch.Size([1000, 2]) torch.Size([1000, 1])


### 2. 构造小批量样本生产函数  
`torch.utils.data.DataLoader()`函数详解：这里主要介绍自动批次加载数据，关注三个点：输出顺序，一个批次的大小，以何种形式输出批次。  
>```Python
>torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=None, sampler=None, batch_sampler=None, num_workers=0,  
>                            collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None,  
>                            multiprocessing_context=None, generator=None, *, prefetch_factor=None, persistent_workers=False,  
>                            pin_memory_device='', in_order=True)  
>Parameters  
>   
>   *  dataset(Dataset) - Map-style datasets or Iterable-style datasets.A map-style dataset is one that implements the __getitem__() and __len__() protocols, and represents a map from (possibly non-integral) indices/keys to data samples. An iterable-style dataset is an instance of a subclass of IterableDataset that implements the __iter__() protocol, and represents an iterable over data samples.  
>   *  batch_size(int, optional) - 一个batch的样本数
>   *  shuffle(bool, optional) - 随机打乱datasets的样本输出顺序
>   *  sampler(Sampler or Iterable, optional) - 定义dataset的样本输出顺序，每次输出一个样本，不能与shuffle同时使用（For map-style datasets, the sampler is either provided by user or constructed based on the shuffle argument.）  
>   *  batch_sampler(Sampler or Iterable, optional)  - 定义dataset的样本输出顺序和batch_size，不能与batch_size、shuffle、sampler同时使用
>   *  collate_fn(Callable, optional)  - 一个定义如何打包输出样本的函数，有默认值  
>```  

`torch.utils.data.TensorDateset()`类详解：  
>```python
> def __init__(self, *tensors: Tensor) -> None: 输入是多个最高维度相同的tensor，例如tensor1, tensor2, ..., tensorx  
> def __getitem__(self, index):  输出是tuple(tensor1[index], tensor2[index], ..., tensorx[index])  
>```

In [63]:
def load_array(data_arrays, batch_size, is_train=True):
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)
print(next(iter(data_iter)))

[tensor([[-0.0864, -0.3470],
        [-0.4102, -0.4229],
        [ 1.1013, -0.7217],
        [-0.3959, -0.8336],
        [ 0.0923, -0.3248],
        [-1.4870, -0.8908],
        [ 0.4897,  0.0634],
        [-0.5066, -0.3753],
        [-0.2499, -0.9528],
        [ 0.0209, -0.6818]]), tensor([[5.2185],
        [4.8035],
        [8.8540],
        [6.2350],
        [5.4861],
        [4.2617],
        [4.9746],
        [4.4595],
        [6.9510],
        [6.5486]])]


### 3. 定义模型  
`nn.Linear()`函数详解：  
>```Python
>torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
> Parameters  
>   *  in_features (int) – size of each input sample  
>   *  out_features (int) – size of each output sample  
>   *  bias(bool)** – If set to False, the layer will not learn an additive bias. Default: True  
> # 举例  
> m = nn.Linear(20, 30) # 输入样本的features数为20，输出样本的features数为30  
> input = torch.randn(128, 20) # 输入128个样本，每个样本的features数为20   
> output = m(input)  
> print(output.size())
> torch.Size([128, 30]) # 输出128个样本，每个样本的features数为30  
>```  

In [50]:
net = nn.Sequential(nn.Linear(2, 1))
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

### 4. 定义损失函数  
`nn.MSELoss()`计算的是误差平均值。  

In [51]:
loss = nn.MSELoss() # 生成一个MSELoss实例，包含该实例具有方法__call__()

### 5. 定义优化算法  

In [52]:
trainer = torch.optim.SGD(net.parameters(), lr=0.01)

### 6. 训练

In [59]:
num_epochs = 9
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y) # 带入模型，计算损失
        trainer.zero_grad() # 将参数之前的梯度清除
        l.backward() # 反向传播，获取梯度
        trainer.step() # 优化，参数更新，SGD用@torch.no_grad()修饰了
        
    l = loss(net(features), labels) # 注意这里的l与上面循环中的l不是同一个，因此不用torch.no_grad()也可以
    print(f'epoch{epoch + 1}, loss{l:f}')

l.backward()
w = net[0].weight.data
b = net[0].bias.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
print('b的估计误差：', true_b - b)
print('w的梯度：', net[0].weight.data.grad)
print('b的梯度：', net[0].bias.data.grad)


epoch1, loss0.000106
epoch2, loss0.000106
epoch3, loss0.000106
epoch4, loss0.000106
epoch5, loss0.000106
epoch6, loss0.000106
epoch7, loss0.000106
epoch8, loss0.000106
epoch9, loss0.000106
torch.FloatTensor
w的估计误差： tensor([2.8467e-04, 1.5736e-05])
b的估计误差： tensor([-0.0005])
w的梯度： None
b的梯度： None


In [58]:
a = torch.arange(10, dtype=torch.float32)
print(id(a))
a = torch.arange(10, dtype=torch.float32)
print(id(a))

2850161550160
2850182067648
