### 框架干的最厉害的一件事就是帮我们把返向传播全部计算好了

In [1]:
import torch

需要求导的，可以手动定义：

In [2]:
#方法1
x = torch.randn(3,4,requires_grad=True)
x

tensor([[ 1.6917,  0.8596, -0.7225, -0.2817],
        [-1.1565,  0.0324, -0.3189,  1.7134],
        [ 1.6229, -0.1414, -0.0893, -1.6337]], requires_grad=True)

In [3]:
#方法2
x = torch.randn(3,4)
x.requires_grad=True
x

tensor([[-0.9035, -0.8695,  0.1541,  0.7065],
        [ 1.2810, -0.5710,  1.0751, -1.0052],
        [ 0.7824, -1.6710,  0.1632, -0.5911]], requires_grad=True)

In [4]:
b = torch.randn(3,4,requires_grad=True)

In [5]:
t = x + b

In [6]:
y = t.sum()
y

tensor(-4.5015, grad_fn=<SumBackward0>)

In [7]:
y.backward()

In [8]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

虽然没有指定t的requires_grad但是需要用到它，也会默认的

In [9]:
x.requires_grad, b.requires_grad, t.requires_grad

(True, True, True)

### 举个例子看一下：

![title](./img/2.png)

In [10]:
#计算流程
x = torch.rand(1)
b = torch.rand(1, requires_grad = True)
w = torch.rand(1, requires_grad = True)
y = w * x 
z = y + b 

In [11]:
x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad#注意y也是需要的

(False, True, True, True)

In [12]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf

(True, True, True, False, False)

返向传播计算

In [13]:
z.backward(retain_graph=True)#如果不清空会累加起来

In [14]:
w.grad

tensor([0.5047])

In [15]:
b.grad

tensor([1.])

### 做一个线性回归试试水

构造一组输入数据X和其对应的标签y

In [17]:
import numpy as np

In [37]:
x_train

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.]], dtype=float32)

In [18]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape

(11, 1)

In [19]:
y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train.shape

(11, 1)

In [20]:
import torch
import torch.nn as nn

### 线性回归模型
- 其实线性回归就是一个不加激活函数的全连接层

In [21]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

In [22]:
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

In [23]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

指定好参数和损失函数

In [24]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

训练模型

In [25]:
for epoch in range(epochs):
    epoch += 1
    # 注意转行成tensor
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)

    # 梯度要清零每一次迭代
    optimizer.zero_grad() 

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 返向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.0011439467780292034
epoch 100, loss 0.0006524861673824489
epoch 150, loss 0.00037215184420347214
epoch 200, loss 0.0002122612640960142
epoch 250, loss 0.00012105758651159704
epoch 300, loss 6.904589827172458e-05
epoch 350, loss 3.938117151847109e-05
epoch 400, loss 2.2463682398665696e-05
epoch 450, loss 1.2811829037673306e-05
epoch 500, loss 7.306821771635441e-06
epoch 550, loss 4.168630766798742e-06
epoch 600, loss 2.3772777240083087e-06
epoch 650, loss 1.35587526983727e-06
epoch 700, loss 7.734261089353822e-07
epoch 750, loss 4.410334213389433e-07
epoch 800, loss 2.517213602004631e-07
epoch 850, loss 1.4346085208671866e-07
epoch 900, loss 8.191996414552705e-08
epoch 950, loss 4.669229980436285e-08
epoch 1000, loss 2.6653259155295927e-08


### 测试模型预测结果

In [40]:
model(torch.from_numpy(x_train).requires_grad_())

tensor([[ 0.9954],
        [ 2.9961],
        [ 4.9967],
        [ 6.9974],
        [ 8.9981],
        [10.9987],
        [12.9994],
        [15.0000],
        [17.0007],
        [19.0014],
        [21.0020]], grad_fn=<AddmmBackward0>)

In [38]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.9954267],
       [ 2.9960852],
       [ 4.9967437],
       [ 6.997402 ],
       [ 8.998061 ],
       [10.998719 ],
       [12.999378 ],
       [15.000037 ],
       [17.000694 ],
       [19.001352 ],
       [21.00201  ]], dtype=float32)

### 模型的保存与读取

In [27]:
torch.save(model.state_dict(), 'model.pkl')

In [28]:
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### 使用GPU进行训练
- 只需要把数据和模型传入到cuda里面就可以了

In [29]:
import torch
import torch.nn as nn
import numpy as np


class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)  

    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)


criterion = nn.MSELoss()


learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad() 

    outputs = model(inputs)

    loss = criterion(outputs, labels)

    loss.backward()

    optimizer.step()

    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.2596997916698456
epoch 100, loss 0.14812305569648743
epoch 150, loss 0.08448372036218643
epoch 200, loss 0.04818641021847725
epoch 250, loss 0.027483642101287842
epoch 300, loss 0.01567569188773632
epoch 350, loss 0.00894078891724348
epoch 400, loss 0.0050994944758713245
epoch 450, loss 0.002908579306676984
epoch 500, loss 0.0016589458100497723
epoch 550, loss 0.0009461900335736573
epoch 600, loss 0.0005396712804213166
epoch 650, loss 0.00030781154055148363
epoch 700, loss 0.0001755628181854263
epoch 750, loss 0.0001001333657768555
epoch 800, loss 5.711294579668902e-05
epoch 850, loss 3.257288699387573e-05
epoch 900, loss 1.857947245298419e-05
epoch 950, loss 1.0597496839181986e-05
epoch 1000, loss 6.043941084499238e-06


In [34]:
model(torch.from_numpy(x_train).requires_grad_()).data

tensor([[ 0.9954],
        [ 2.9961],
        [ 4.9967],
        [ 6.9974],
        [ 8.9981],
        [10.9987],
        [12.9994],
        [15.0000],
        [17.0007],
        [19.0014],
        [21.0020]])