#### pytorch的安装
- cpu版本安装:pip install torch==1.3.0+cpu torchvision=0.4.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
- GPU版本安装: pip install torch==1.3.0 torchvision=0.4 -f https://download.pytorch.org/whl/torch_stable

In [7]:
import torch
torch.__version__


'1.10.2'

#### 基本使用方法
- 创建一个矩阵

In [8]:
x = torch.empty(5, 3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


- 创建一个随机值

In [9]:
x = torch.rand(5, 3)
x

tensor([[0.5681, 0.1327, 0.7864],
        [0.0698, 0.0250, 0.6641],
        [0.7761, 0.7579, 0.4064],
        [0.5142, 0.9232, 0.1499],
        [0.3884, 0.7971, 0.9654]])

- 初始化一个全零的矩阵

In [10]:
x = torch.zeros(5, 3, dtype=torch.long)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [11]:
# 直接传入数据
x = torch.tensor([5, 5, 3])

In [12]:
x

tensor([5, 5, 3])

In [13]:
x = x.new_ones(5, 3, dtype=torch.double)

In [14]:
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

In [15]:
x = torch.randn_like(x, dtype=torch.float)
x

tensor([[-1.8858,  0.7104,  1.2545],
        [-2.1260, -0.0664, -1.1327],
        [ 0.8287, -1.5604,  0.5398],
        [-1.8596, -0.3208, -0.1077],
        [-0.3385, -0.1479,  1.4332]])

In [16]:
x.size()

torch.Size([5, 3])

- 基本计算方法

In [17]:
y = torch.rand(5,3)
x + y

tensor([[-0.9592,  1.4618,  1.3465],
        [-1.7017,  0.1503, -0.4365],
        [ 1.7972, -1.2967,  0.8953],
        [-1.0163, -0.1322,  0.0564],
        [ 0.0436,  0.7670,  1.7571]])

In [18]:
torch.add(x, y)

tensor([[-0.9592,  1.4618,  1.3465],
        [-1.7017,  0.1503, -0.4365],
        [ 1.7972, -1.2967,  0.8953],
        [-1.0163, -0.1322,  0.0564],
        [ 0.0436,  0.7670,  1.7571]])

- 索引

In [19]:
x[:, -1]

tensor([ 1.2545, -1.1327,  0.5398, -0.1077,  1.4332])

In [20]:
x

tensor([[-1.8858,  0.7104,  1.2545],
        [-2.1260, -0.0664, -1.1327],
        [ 0.8287, -1.5604,  0.5398],
        [-1.8596, -0.3208, -0.1077],
        [-0.3385, -0.1479,  1.4332]])

In [29]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [30]:
y

tensor([-0.8590,  0.2295, -1.8055, -0.2096,  0.6050, -0.9037,  3.3610, -0.1885,
         0.2639, -0.4013,  0.2293,  1.2923,  0.8684,  0.8374, -1.4927,  0.4628])

In [31]:
x

tensor([[-0.8590,  0.2295, -1.8055, -0.2096],
        [ 0.6050, -0.9037,  3.3610, -0.1885],
        [ 0.2639, -0.4013,  0.2293,  1.2923],
        [ 0.8684,  0.8374, -1.4927,  0.4628]])

In [32]:
z

tensor([[-0.8590,  0.2295, -1.8055, -0.2096,  0.6050, -0.9037,  3.3610, -0.1885],
        [ 0.2639, -0.4013,  0.2293,  1.2923,  0.8684,  0.8374, -1.4927,  0.4628]])

- 和Numpy的协同操作

In [33]:
a = torch.ones(5)
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [34]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
b

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

- 框架干的最厉害的一件事就是帮我们把返回传播全部计算好了

In [35]:
x = torch.randn(3, 4, requires_grad=True)
x

tensor([[ 8.7642e-01,  9.7455e-02, -1.9850e+00,  3.0431e-01],
        [-1.5169e+00,  4.8478e-01,  2.1452e+00, -3.8118e-01],
        [ 3.3879e-01, -2.9269e-05, -2.1844e-02, -5.3195e-01]],
       requires_grad=True)

In [36]:
# 方法2
x = torch.randn(3, 4)
x.requires_grad = True
x

tensor([[ 0.4582,  0.6653, -0.3796, -0.2092],
        [-2.1461, -0.6125,  0.6430, -0.5255],
        [ 0.3422,  1.1090,  0.8936, -0.8197]], requires_grad=True)

In [37]:
b = torch.randn(3, 4, requires_grad=True)

In [38]:
t = x + b

In [39]:
y = t.sum()
y

tensor(6.7207, grad_fn=<SumBackward0>)

In [40]:
y.backward()

In [41]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [42]:
x.requires_grad, y.requires_grad, t.requires_grad

(True, True, True)

In [44]:
# 计算流程
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)
y = w * x
z = y + b

In [46]:
x.requires_grad, y.requires_grad, w.requires_grad, b.requires_grad

(False, True, True, True)

In [47]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf

(True, True, True, False, False)

In [49]:
z.backward(retain_graph=True) #如果计算不清空会累加起来
w.grad

tensor([0.0210])

In [54]:
b.grad

tensor([1.])

- 做一个简单的线性回归

In [55]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape

(11, 1)

In [67]:
y_values = [2*i+1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)

In [68]:
y_train = y_train.reshape(-1, 1)
y_train.shape

(11, 1)

In [69]:
import torch.nn as nn

- 其实线性回归就是一个不加激活函数的全连接层

In [71]:
class LinearRegressionModel(nn.Module):
    def __init__(self,input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return(out)

In [72]:
input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)

In [73]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

In [78]:
#指定好参数和损失函数
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
#训练模型
for epoch in range(epochs):
    epoch += 1
    # 注意转行成tensor
    inputs = torch.from_numpy(x_train)
    labels =  torch.from_numpy(y_train)
    # 梯度要清零每一次迭代
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)
    # 计算损失
    loss = criterion(outputs, labels)
    # 返向传播
    loss.backward()
    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 1.268649851837722e-09
epoch 100, loss 7.429903359223999e-10
epoch 150, loss 4.2138834088767396e-10
epoch 200, loss 2.447629043889066e-10
epoch 250, loss 1.426721379926832e-10
epoch 300, loss 8.437921195092457e-11
epoch 350, loss 5.201689426415079e-11
epoch 400, loss 3.177159577094635e-11
epoch 450, loss 1.7016853204321336e-11
epoch 500, loss 1.5593505653388462e-11
epoch 550, loss 1.5593505653388462e-11
epoch 600, loss 1.5593505653388462e-11
epoch 650, loss 1.5593505653388462e-11
epoch 700, loss 1.5593505653388462e-11
epoch 750, loss 1.5593505653388462e-11
epoch 800, loss 1.5593505653388462e-11
epoch 850, loss 1.5593505653388462e-11
epoch 900, loss 1.5593505653388462e-11
epoch 950, loss 1.5593505653388462e-11
epoch 1000, loss 1.5593505653388462e-11


In [79]:
#模型预测结果
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.99999267],
       [ 2.9999938 ],
       [ 4.999995  ],
       [ 6.9999967 ],
       [ 8.999997  ],
       [10.999998  ],
       [13.        ],
       [15.000001  ],
       [17.000002  ],
       [19.000004  ],
       [21.000004  ]], dtype=float32)

In [80]:
# 模型的保存和读取
torch.save(model.state_dict(), 'model.pkl')
model.load_state_dict(torch.load("model.pkl"))

<All keys matched successfully>

#### 使用GPU进行训练
- 只需要把数据和模型传入到cuda里面就可以了

In [85]:
class LinearRegressionModelGpu(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModelGpu, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1

model = LinearRegressionModelGpu(input_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch = epoch + 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item))

epoch 50, loss <built-in method item of Tensor object at 0x186d15228>
epoch 100, loss <built-in method item of Tensor object at 0x186d15228>
epoch 150, loss <built-in method item of Tensor object at 0x186d15228>
epoch 200, loss <built-in method item of Tensor object at 0x186d15228>
epoch 250, loss <built-in method item of Tensor object at 0x186d15228>
epoch 300, loss <built-in method item of Tensor object at 0x186d15228>
epoch 350, loss <built-in method item of Tensor object at 0x186d15228>
epoch 400, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 450, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 500, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 550, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 600, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 650, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 700, loss <built-in method item of Tensor object at 0x186d52cc8>
epoch 7