#### pytorch的安装
- cpu版本安装:pip install torch==1.3.0+cpu torchvision=0.4.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
- GPU版本安装: pip install torch==1.3.0 torchvision=0.4 -f https://download.pytorch.org/whl/torch_stable

In [2]:
import torch
torch.__version__


'1.12.1'

'1.12.1'

#### 基本使用方法
- 创建一个矩阵

In [8]:
x = torch.empty(5, 3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


- 创建一个随机值

In [9]:
x = torch.rand(5, 3)
x

tensor([[0.5681, 0.1327, 0.7864],
        [0.0698, 0.0250, 0.6641],
        [0.7761, 0.7579, 0.4064],
        [0.5142, 0.9232, 0.1499],
        [0.3884, 0.7971, 0.9654]])

- 初始化一个全零的矩阵

In [10]:
x = torch.zeros(5, 3, dtype=torch.long)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [11]:
# 直接传入数据
x = torch.tensor([5, 5, 3])

In [12]:
x

tensor([5, 5, 3])

In [13]:
x = x.new_ones(5, 3, dtype=torch.double)

In [14]:
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

In [15]:
x = torch.randn_like(x, dtype=torch.float)
x

tensor([[-1.8858,  0.7104,  1.2545],
        [-2.1260, -0.0664, -1.1327],
        [ 0.8287, -1.5604,  0.5398],
        [-1.8596, -0.3208, -0.1077],
        [-0.3385, -0.1479,  1.4332]])

In [16]:
x.size()

torch.Size([5, 3])

- 基本计算方法

In [17]:
y = torch.rand(5,3)
x + y

tensor([[-0.9592,  1.4618,  1.3465],
        [-1.7017,  0.1503, -0.4365],
        [ 1.7972, -1.2967,  0.8953],
        [-1.0163, -0.1322,  0.0564],
        [ 0.0436,  0.7670,  1.7571]])

In [18]:
torch.add(x, y)

tensor([[-0.9592,  1.4618,  1.3465],
        [-1.7017,  0.1503, -0.4365],
        [ 1.7972, -1.2967,  0.8953],
        [-1.0163, -0.1322,  0.0564],
        [ 0.0436,  0.7670,  1.7571]])

- 索引

In [19]:
x[:, -1]

tensor([ 1.2545, -1.1327,  0.5398, -0.1077,  1.4332])

In [20]:
x

tensor([[-1.8858,  0.7104,  1.2545],
        [-2.1260, -0.0664, -1.1327],
        [ 0.8287, -1.5604,  0.5398],
        [-1.8596, -0.3208, -0.1077],
        [-0.3385, -0.1479,  1.4332]])

In [29]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [30]:
y

tensor([-0.8590,  0.2295, -1.8055, -0.2096,  0.6050, -0.9037,  3.3610, -0.1885,
         0.2639, -0.4013,  0.2293,  1.2923,  0.8684,  0.8374, -1.4927,  0.4628])

In [31]:
x

tensor([[-0.8590,  0.2295, -1.8055, -0.2096],
        [ 0.6050, -0.9037,  3.3610, -0.1885],
        [ 0.2639, -0.4013,  0.2293,  1.2923],
        [ 0.8684,  0.8374, -1.4927,  0.4628]])

In [32]:
z

tensor([[-0.8590,  0.2295, -1.8055, -0.2096,  0.6050, -0.9037,  3.3610, -0.1885],
        [ 0.2639, -0.4013,  0.2293,  1.2923,  0.8684,  0.8374, -1.4927,  0.4628]])

- 和Numpy的协同操作

In [33]:
a = torch.ones(5)
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [5]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
b

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

- 框架干的最厉害的一件事就是帮我们把返回传播全部计算好了

In [35]:
x = torch.randn(3, 4, requires_grad=True)
x

tensor([[ 8.7642e-01,  9.7455e-02, -1.9850e+00,  3.0431e-01],
        [-1.5169e+00,  4.8478e-01,  2.1452e+00, -3.8118e-01],
        [ 3.3879e-01, -2.9269e-05, -2.1844e-02, -5.3195e-01]],
       requires_grad=True)

In [36]:
# 方法2
x = torch.randn(3, 4)
x.requires_grad = True
x

tensor([[ 0.4582,  0.6653, -0.3796, -0.2092],
        [-2.1461, -0.6125,  0.6430, -0.5255],
        [ 0.3422,  1.1090,  0.8936, -0.8197]], requires_grad=True)

In [37]:
b = torch.randn(3, 4, requires_grad=True)

In [38]:
t = x + b

In [39]:
y = t.sum()
y

tensor(6.7207, grad_fn=<SumBackward0>)

In [40]:
y.backward()

In [41]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [42]:
x.requires_grad, y.requires_grad, t.requires_grad

(True, True, True)

In [44]:
# 计算流程
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)
y = w * x
z = y + b

In [46]:
x.requires_grad, y.requires_grad, w.requires_grad, b.requires_grad

(False, True, True, True)

In [47]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf

(True, True, True, False, False)

In [49]:
z.backward(retain_graph=True) #如果计算不清空会累加起来
w.grad

tensor([0.0210])

In [54]:
b.grad

tensor([1.])

- 做一个简单的线性回归

In [6]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1) #把数据转换成矩阵的形式
x_train.shape

(11, 1)

In [7]:
y_values = [2*i+1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)

In [8]:
y_train = y_train.reshape(-1, 1)
y_train.shape

(11, 1)

In [9]:
import torch.nn as nn

- 其实线性回归就是一个不加激活函数的全连接层

In [10]:
class LinearRegressionModel(nn.Module):
    def __init__(self,input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

In [11]:
input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)

In [73]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

In [12]:
#指定好参数和损失函数
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  # 优化器
criterion = nn.MSELoss() # 损失函数
#训练模型
for epoch in range(epochs):
    epoch += 1
    # 注意转行成tensor
    inputs = torch.from_numpy(x_train)
    labels =  torch.from_numpy(y_train) # ndarray转换成tensor 标签
    # 梯度要清零每一次迭代
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)
    # 计算损失
    loss = criterion(outputs, labels)
    # 返向传播
    loss.backward()
    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.1153845265507698
epoch 100, loss 0.0658109188079834
epoch 150, loss 0.03753611072897911
epoch 200, loss 0.021409111097455025
epoch 250, loss 0.012211007066071033
epoch 300, loss 0.006964683532714844
epoch 350, loss 0.003972407430410385
epoch 400, loss 0.002265706192702055
epoch 450, loss 0.0012922630412504077
epoch 500, loss 0.000737051828764379
epoch 550, loss 0.0004203898715786636
epoch 600, loss 0.00023977392993401736
epoch 650, loss 0.0001367626100545749
epoch 700, loss 7.800171442795545e-05
epoch 750, loss 4.448592517292127e-05
epoch 800, loss 2.5376672056154348e-05
epoch 850, loss 1.4473599549091887e-05
epoch 900, loss 8.255916327470914e-06
epoch 950, loss 4.708312189904973e-06
epoch 1000, loss 2.6860516300075687e-06


In [13]:
#模型预测结果
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.9969515],
       [ 2.9973905],
       [ 4.9978294],
       [ 6.9982686],
       [ 8.998707 ],
       [10.9991455],
       [12.999585 ],
       [15.000024 ],
       [17.000463 ],
       [19.000902 ],
       [21.00134  ]], dtype=float32)

In [14]:
# 模型的保存和读取
torch.save(model.state_dict(), 'model.pkl')
model.load_state_dict(torch.load("model.pkl"))

<All keys matched successfully>

#### 使用GPU进行训练
- 只需要把数据和模型传入到cuda里面就可以了

In [16]:
class LinearRegressionModelGpu(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModelGpu, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModelGpu(input_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch = epoch + 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.2503982186317444
epoch 100, loss 0.1428179144859314
epoch 150, loss 0.08145790547132492
epoch 200, loss 0.04646045342087746
epoch 250, loss 0.026499329134821892
epoch 300, loss 0.015114233829081059
epoch 350, loss 0.008620606735348701
epoch 400, loss 0.004916887730360031
epoch 450, loss 0.002804417395964265
epoch 500, loss 0.0015995538560673594
epoch 550, loss 0.0009123071795329452
epoch 600, loss 0.0005203422624617815
epoch 650, loss 0.0002967813634313643
epoch 700, loss 0.0001692787918727845
epoch 750, loss 9.654973837314174e-05
epoch 800, loss 5.5066251661628485e-05
epoch 850, loss 3.140662738587707e-05
epoch 900, loss 1.7915388525580056e-05
epoch 950, loss 1.0218884199275635e-05
epoch 1000, loss 5.828241683047963e-06
