# 深度学习计算
Pytorch 可以方便的构造结构化的神经网络。

## 以“多层感知机”为例

### 使用pytorch内置的“Sequential”来创建块。
创建一个两层的网络结构。第一层是一个256个单元和ReLU激活函数的全连接的隐藏层；第二层是一个具有10个隐藏单元不带激活函数的全连接的输出层。

In [12]:
import torch

net = torch.nn.Sequential(torch.nn.Linear(
    20, 256), torch.nn.ReLU(), torch.nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[ 0.0025,  0.3402,  0.1446,  0.0756,  0.0347, -0.0842, -0.0869,  0.0454,
         -0.2401,  0.1225],
        [ 0.0703,  0.2796,  0.0541,  0.0137,  0.0451,  0.0215, -0.0850, -0.0919,
         -0.2147,  0.0110]], grad_fn=<AddmmBackward0>)

### 自定义块

In [13]:
class MLP(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = torch.nn.Linear(20, 256)
        self.out = torch.nn.Linear(256, 10)

    def forward(self, X):
        return self.out(torch.nn.functional.relu(self.hidden(X)))


net = MLP()
X = torch.rand(2, 20)
net(X)

tensor([[-0.0589,  0.0416, -0.0361, -0.0030,  0.2509,  0.1424,  0.1926, -0.1186,
          0.0687, -0.0658],
        [-0.0578,  0.0836,  0.0741, -0.0082,  0.2624,  0.1138,  0.1078, -0.0280,
          0.1162, -0.1145]], grad_fn=<AddmmBackward0>)

## 实现pytorch内置的“Sequential”

In [17]:
class MySequential(torch.nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block  # Module内置了一个有序字典

    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X


net = MySequential(torch.nn.Linear(20, 256),
                   torch.nn.ReLU(), torch.nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[ 0.2887,  0.0716,  0.4199, -0.2774,  0.1471,  0.1894, -0.2937, -0.2233,
          0.0428, -0.0668],
        [ 0.3021,  0.0883,  0.5142, -0.3177,  0.2247,  0.1806, -0.3075, -0.2498,
         -0.0454, -0.0476]], grad_fn=<AddmmBackward0>)

## 查看神经网络

In [18]:
net = torch.nn.Sequential(torch.nn.Linear(
    20, 256), torch.nn.ReLU(), torch.nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[ 0.0158, -0.1089, -0.0396,  0.0476, -0.0572,  0.2625, -0.0118, -0.1034,
          0.0983, -0.0203],
        [ 0.0861, -0.1505,  0.0174,  0.0709, -0.0258,  0.1927, -0.0503,  0.0008,
         -0.0225, -0.0891]], grad_fn=<AddmmBackward0>)

### 查看神经网络结构

In [19]:
net

Sequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

### 查看每层的参数

In [25]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[-0.0405, -0.0025, -0.0189,  ..., -0.0561, -0.0423,  0.0461],
                      [ 0.0523,  0.0048,  0.0618,  ..., -0.0143,  0.0534, -0.0197],
                      [ 0.0061,  0.0553, -0.0093,  ..., -0.0565,  0.0610,  0.0190],
                      ...,
                      [ 0.0069,  0.0035,  0.0332,  ..., -0.0187,  0.0151,  0.0262],
                      [-0.0576,  0.0056, -0.0600,  ..., -0.0374, -0.0137,  0.0107],
                      [ 0.0291, -0.0184,  0.0187,  ..., -0.0386, -0.0488,  0.0595]])),
             ('bias',
              tensor([ 0.0112,  0.0452,  0.0435,  0.0369, -0.0139,  0.0280, -0.0466, -0.0326,
                       0.0518, -0.0620]))])

In [28]:
print(net[2].bias)
print(net[2].bias.data)

Parameter containing:
tensor([ 0.0112,  0.0452,  0.0435,  0.0369, -0.0139,  0.0280, -0.0466, -0.0326,
         0.0518, -0.0620], requires_grad=True)
tensor([ 0.0112,  0.0452,  0.0435,  0.0369, -0.0139,  0.0280, -0.0466, -0.0326,
         0.0518, -0.0620])


In [43]:
for name, param in net.named_parameters():
    print((name, param.shape))
print(net.state_dict()['2.bias'])

('0.weight', torch.Size([256, 20]))
('0.bias', torch.Size([256]))
('2.weight', torch.Size([10, 256]))
('2.bias', torch.Size([10]))
tensor([ 0.0112,  0.0452,  0.0435,  0.0369, -0.0139,  0.0280, -0.0466, -0.0326,
         0.0518, -0.0620])


## 参数初始化

### 将权重初始化为标准差为0.01的高斯随机变量，并将偏置设置为0

In [49]:
def init_normal(module):
    if type(module) == torch.nn.Linear:
        torch.nn.init.normal_(module.weight,mean=0,std=0.01)
        torch.nn.init.zeros_(module.bias)

net = torch.nn.Sequential(torch.nn.Linear(
    20, 256), torch.nn.ReLU(), torch.nn.Linear(256, 10))

net.apply(init_normal)
net[2].state_dict()

OrderedDict([('weight',
              tensor([[ 0.0198,  0.0013,  0.0006,  ..., -0.0224,  0.0123,  0.0019],
                      [ 0.0110,  0.0102, -0.0081,  ..., -0.0075, -0.0132, -0.0232],
                      [ 0.0043,  0.0054,  0.0038,  ..., -0.0271, -0.0036, -0.0029],
                      ...,
                      [-0.0063, -0.0118, -0.0082,  ..., -0.0027, -0.0139, -0.0110],
                      [ 0.0033, -0.0247,  0.0036,  ...,  0.0114,  0.0004, -0.0166],
                      [-0.0181,  0.0165, -0.0071,  ...,  0.0047, -0.0014,  0.0172]])),
             ('bias', tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))])

### 将权重初始化为1，并将偏置设置为0

In [50]:
def init_normal(module):
    if type(module) == torch.nn.Linear:
        torch.nn.init.constant_(module.weight,1)
        torch.nn.init.zeros_(module.bias)

net = torch.nn.Sequential(torch.nn.Linear(
    20, 256), torch.nn.ReLU(), torch.nn.Linear(256, 10))

net.apply(init_normal)
net[2].state_dict()

OrderedDict([('weight',
              tensor([[1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      ...,
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.]])),
             ('bias', tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))])

### 直接设定某个参数

In [53]:
net[2].bias.data[0]=10
net[2].state_dict()

OrderedDict([('weight',
              tensor([[1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      ...,
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.],
                      [1., 1., 1.,  ..., 1., 1., 1.]])),
             ('bias',
              tensor([10.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]))])

## 保存和读取张量和网络

### 保存张量

In [62]:
X = torch.rand(2, 20)
y = torch.zeros(10)
torch.save([X,y],'X-save_temp')
[X,y]

[tensor([[0.3542, 0.9103, 0.6794, 0.5084, 0.9133, 0.0081, 0.8036, 0.5374, 0.5328,
          0.2054, 0.3020, 0.3962, 0.3102, 0.3714, 0.8525, 0.9643, 0.9550, 0.5625,
          0.6267, 0.0515],
         [0.4727, 0.0999, 0.0940, 0.5063, 0.3826, 0.8663, 0.8532, 0.3783, 0.2636,
          0.0717, 0.1112, 0.7846, 0.2300, 0.4228, 0.1764, 0.8222, 0.8728, 0.8149,
          0.2174, 0.4113]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]

### 读取张量

In [63]:
X_read,y_read = torch.load('X-save_temp')
X_read,y_read

(tensor([[0.3542, 0.9103, 0.6794, 0.5084, 0.9133, 0.0081, 0.8036, 0.5374, 0.5328,
          0.2054, 0.3020, 0.3962, 0.3102, 0.3714, 0.8525, 0.9643, 0.9550, 0.5625,
          0.6267, 0.0515],
         [0.4727, 0.0999, 0.0940, 0.5063, 0.3826, 0.8663, 0.8532, 0.3783, 0.2636,
          0.0717, 0.1112, 0.7846, 0.2300, 0.4228, 0.1764, 0.8222, 0.8728, 0.8149,
          0.2174, 0.4113]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))

### 保存整个模型

In [71]:
net = torch.nn.Sequential(torch.nn.Linear(2, 5),
                   torch.nn.ReLU(), torch.nn.Linear(5, 2))
torch.save(net,'net_temp')
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.1187,  0.4239],
                      [ 0.1013,  0.5562],
                      [ 0.4396, -0.1136],
                      [ 0.5349,  0.7042],
                      [-0.2213,  0.6671]])),
             ('0.bias', tensor([ 0.6218, -0.1190,  0.4912,  0.3491, -0.2176])),
             ('2.weight',
              tensor([[-0.4181,  0.4151, -0.3008,  0.1904, -0.1256],
                      [ 0.2354, -0.0326,  0.0359,  0.3142, -0.1758]])),
             ('2.bias', tensor([0.2629, 0.3474]))])

### 读取整个模型

In [72]:
net_read = torch.load('net_temp')
net_read.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.1187,  0.4239],
                      [ 0.1013,  0.5562],
                      [ 0.4396, -0.1136],
                      [ 0.5349,  0.7042],
                      [-0.2213,  0.6671]])),
             ('0.bias', tensor([ 0.6218, -0.1190,  0.4912,  0.3491, -0.2176])),
             ('2.weight',
              tensor([[-0.4181,  0.4151, -0.3008,  0.1904, -0.1256],
                      [ 0.2354, -0.0326,  0.0359,  0.3142, -0.1758]])),
             ('2.bias', tensor([0.2629, 0.3474]))])

### 仅保存模型的参数

In [78]:
net = torch.nn.Sequential(torch.nn.Linear(2, 5),
                   torch.nn.ReLU(), torch.nn.Linear(5, 2))
torch.save(net.state_dict(),'net_params_temp')
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.6121, -0.0523],
                      [-0.1059, -0.0466],
                      [-0.1188, -0.0988],
                      [-0.2418,  0.5693],
                      [-0.1798,  0.1912]])),
             ('0.bias', tensor([-0.2122, -0.2530,  0.2486,  0.4672,  0.6486])),
             ('2.weight',
              tensor([[ 0.2628, -0.2970, -0.0628, -0.4034,  0.4362],
                      [-0.3537,  0.0181,  0.0323,  0.2822, -0.0645]])),
             ('2.bias', tensor([-0.3583,  0.2941]))])

In [80]:
net_new = torch.nn.Sequential(torch.nn.Linear(2, 5),
                   torch.nn.ReLU(), torch.nn.Linear(5, 2))
net_params = torch.load('net_params_temp')
net_new.load_state_dict(net_params)
print(net_new.state_dict())

OrderedDict([('0.weight', tensor([[-0.6121, -0.0523],
        [-0.1059, -0.0466],
        [-0.1188, -0.0988],
        [-0.2418,  0.5693],
        [-0.1798,  0.1912]])), ('0.bias', tensor([-0.2122, -0.2530,  0.2486,  0.4672,  0.6486])), ('2.weight', tensor([[ 0.2628, -0.2970, -0.0628, -0.4034,  0.4362],
        [-0.3537,  0.0181,  0.0323,  0.2822, -0.0645]])), ('2.bias', tensor([-0.3583,  0.2941]))])


## GPU计算

### 查看数据所在的设备

In [83]:
X = torch.rand(2, 20)
X.device

device(type='cpu')

### 将数据移动到GPU上计算

In [90]:
X = torch.rand(2, 20,device=torch.device('cuda:0'))
Y = torch.rand(2, 20,device=torch.device('cuda:0'))
print(X.device)
print(Y.device)
print(X+Y)

cuda:0
cuda:0
tensor([[0.8542, 1.1229, 0.8869, 1.3560, 0.2596, 0.4408, 0.4422, 0.8121, 0.5268,
         0.9892, 0.7047, 0.2713, 1.1169, 0.5368, 1.0712, 1.3488, 0.7407, 0.3929,
         0.5463, 1.1236],
        [0.1262, 0.7674, 1.3805, 1.0873, 0.6866, 1.4109, 1.1670, 0.5813, 1.3712,
         0.7208, 0.3556, 1.2987, 0.3896, 1.4586, 1.4041, 1.3134, 1.0984, 1.6845,
         1.7329, 1.0194]], device='cuda:0')
