## 设置模型存放在cpu/gpu

基础使用

In [3]:
import torch
import torch.nn as nn

net = nn.Sequential(nn.Linear(3, 3))

print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.cuda()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.cpu()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:2249072738256 is_cuda: False

id:2249072738256 is_cuda: True

id:2249072738256 is_cuda: False


to 方法的妙用：根据当前平台是否支持cuda加速，自动选择

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch
import torch.nn as nn

net = nn.Sequential(nn.Linear(3, 3))
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.to(device)
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:2249072667152 is_cuda: False

id:2249072667152 is_cuda: True


## 获取模型参数、加载权重参数

### state_dict

In [5]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        """
        x 是一个Tensor，通常代表了来自模型中某个层的输出。
        .view() 是一个PyTorch中Tensor的操作，用来改变Tensor的形状而不改变其数据。
        x.size(0) 获取的是Tensor x 在第一个维度上的大小，通常对应于批量大小（batch size）。
        -1 是一个在.view()方法中常用的技巧，它告诉PyTorch自动计算该维度的大小，以保持元素的总数不变。
        """
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

state_dict = model.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[-0.1788, -0.0203, -0.3292],
          [ 0.2086,  0.2682,  0.0382],
          [-0.0984,  0.2123,  0.0246]]]])

convolution_layer.bias
tensor([-0.1774])

fc.weight
tensor([[-0.1079, -0.0762,  0.1582, -0.1041, -0.0167,  0.1184, -0.1355,  0.0016,
         -0.1370,  0.0876,  0.0696, -0.0097,  0.0587, -0.0820,  0.0980,  0.1588,
          0.0020, -0.0479, -0.1177,  0.0094, -0.0073,  0.1081,  0.0466,  0.0928,
          0.0174,  0.0685, -0.1042, -0.0263,  0.0626, -0.0960,  0.0705,  0.0608,
          0.0042, -0.1167,  0.0374,  0.1262],
        [ 0.1290, -0.1533,  0.0815, -0.0372,  0.1656, -0.0387,  0.0640,  0.0162,
         -0.0366, -0.1145,  0.1579, -0.0276, -0.0739,  0.0471,  0.1003,  0.0425,
         -0.0007, -0.1184, -0.1328, -0.0779, -0.0133, -0.0135, -0.0446,  0.0533,
         -0.1664, -0.1522, -0.1650, -0.1171, -0.0775, -0.1452,  0.1322, -0.1203,
          0.0555,  0.1153, -0.0102, -0.1104]])

fc.bias
tensor([ 0.1512, -0.0281])



In [6]:
from torchvision import models

resnet18 = models.resnet18()
state_dict = resnet18.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

conv1.weight
tensor([[[[ 0.0106,  0.0004,  0.0191,  ...,  0.0060,  0.0493, -0.0269],
          [ 0.0049, -0.0403,  0.0289,  ...,  0.0297, -0.0770, -0.0175],
          [ 0.0066, -0.0295,  0.0195,  ...,  0.0268,  0.0267,  0.0111],
          ...,
          [ 0.0383, -0.0703,  0.0007,  ...,  0.0025,  0.0168,  0.0587],
          [-0.0360, -0.0193,  0.0042,  ..., -0.0035, -0.0031, -0.0261],
          [-0.0295, -0.0015, -0.0259,  ..., -0.0041,  0.0223,  0.0111]],

         [[-0.0015,  0.0255,  0.0070,  ...,  0.0257,  0.0308, -0.0026],
          [ 0.0169,  0.0395, -0.0033,  ..., -0.0114,  0.0381,  0.0253],
          [ 0.0170,  0.0388,  0.0322,  ...,  0.0185,  0.0062,  0.0406],
          ...,
          [ 0.0086,  0.0466,  0.0344,  ...,  0.0082, -0.0138, -0.0199],
          [ 0.0078, -0.0047, -0.0018,  ..., -0.0363,  0.0204, -0.0029],
          [ 0.0248,  0.0223,  0.0031,  ..., -0.0248,  0.0195, -0.0309]],

         [[-0.0209,  0.0051,  0.0270,  ..., -0.0027, -0.0137, -0.0098],
          [-0.017

In [7]:
from torchvision import models
alexnet = models.AlexNet()
state_dict = alexnet.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

features.0.weight
tensor([[[[ 0.0128, -0.0154, -0.0295,  ...,  0.0393, -0.0307, -0.0167],
          [ 0.0178, -0.0420,  0.0187,  ...,  0.0125,  0.0231,  0.0045],
          [-0.0468,  0.0252,  0.0276,  ...,  0.0325, -0.0104,  0.0079],
          ...,
          [-0.0215,  0.0046,  0.0337,  ..., -0.0179, -0.0164,  0.0063],
          [-0.0413, -0.0463,  0.0176,  ..., -0.0441,  0.0045, -0.0181],
          [-0.0330, -0.0334,  0.0379,  ...,  0.0431,  0.0276,  0.0361]],

         [[ 0.0096, -0.0441,  0.0081,  ..., -0.0434,  0.0400, -0.0516],
          [-0.0262, -0.0120, -0.0256,  ...,  0.0457,  0.0080, -0.0224],
          [-0.0244,  0.0073,  0.0346,  ..., -0.0129, -0.0140,  0.0339],
          ...,
          [ 0.0274, -0.0191,  0.0124,  ..., -0.0405,  0.0519,  0.0041],
          [ 0.0351, -0.0017,  0.0412,  ..., -0.0103,  0.0055, -0.0512],
          [ 0.0303,  0.0241, -0.0211,  ...,  0.0369, -0.0298, -0.0085]],

         [[-0.0197,  0.0192, -0.0237,  ..., -0.0331, -0.0249, -0.0490],
          [-

### load_state_dict

In [8]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

state_dict_tinnycnn = model.state_dict()

state_dict_tinnycnn["convolution_layer.weight"][0, 0, 0, 0] = 12345. # 假设经过训练，权重参数发现变化

model.load_state_dict(state_dict_tinnycnn)  # 再次查看

for key, parameter_value in model.state_dict().items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[ 1.2345e+04, -3.2659e-01,  3.2156e-01],
          [ 7.8893e-02, -1.9854e-01,  2.3939e-01],
          [-1.5226e-01,  2.8276e-01, -1.0291e-01]]]])

convolution_layer.bias
tensor([0.0696])

fc.weight
tensor([[ 0.0166,  0.0341,  0.0007, -0.1095,  0.1208,  0.0825,  0.1662, -0.0212,
         -0.0234, -0.1172,  0.1377,  0.0018,  0.0385, -0.1051, -0.0605,  0.0715,
         -0.1191, -0.1322, -0.0183, -0.1495,  0.0515,  0.1048, -0.0988, -0.0247,
          0.0440, -0.0820, -0.1486, -0.0600, -0.0824,  0.0827,  0.1079, -0.1282,
         -0.1613,  0.1560,  0.1488, -0.1310],
        [-0.0790,  0.1164,  0.0067,  0.1480,  0.1637, -0.0821,  0.0254,  0.1113,
         -0.1098,  0.0949,  0.0028,  0.0515,  0.1613, -0.1463, -0.1318,  0.0574,
          0.0216,  0.1359,  0.0090, -0.0693, -0.0919, -0.0821, -0.0592, -0.0575,
         -0.0359,  0.0730,  0.0356,  0.1206,  0.1035, -0.0248,  0.0454, -0.0055,
          0.0081, -0.0704,  0.0564,  0.1043]])

fc.bias
tensor([-0.1222, 

### load_state_dict常见报错

In [9]:
from torchvision import models
alexnet = models.AlexNet()
alexnet.load_state_dict(state_dict_tinnycnn)

RuntimeError: Error(s) in loading state_dict for AlexNet:
	Missing key(s) in state_dict: "features.0.weight", "features.0.bias", "features.3.weight", "features.3.bias", "features.6.weight", "features.6.bias", "features.8.weight", "features.8.bias", "features.10.weight", "features.10.bias", "classifier.1.weight", "classifier.1.bias", "classifier.4.weight", "classifier.4.bias", "classifier.6.weight", "classifier.6.bias". 
	Unexpected key(s) in state_dict: "convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias". 

可以看到对alexnet这个模型传入TinyCNN的state_dict，会得到两大报错：
* 第一种是alexnet需要的，但传进来的字典里没找到：分别是"features.0.weight", "features.0.bias"等等
* 第二种是传进来的不是alexnet想要的，分别是"convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias".

# Module的模块、参数管理

### paramters、 named_parameters

In [10]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

In [11]:
for param in model.parameters():
    print(type(param), param.size())
    print(param, end="\n\n")

<class 'torch.nn.parameter.Parameter'> torch.Size([1, 1, 3, 3])
Parameter containing:
tensor([[[[0.2743, 0.3003, 0.1551],
          [0.0062, 0.1478, 0.2129],
          [0.2620, 0.3257, 0.0935]]]], requires_grad=True)

<class 'torch.nn.parameter.Parameter'> torch.Size([1])
Parameter containing:
tensor([-0.1651], requires_grad=True)

<class 'torch.nn.parameter.Parameter'> torch.Size([2, 36])
Parameter containing:
tensor([[ 0.0365,  0.0114, -0.0199,  0.1205,  0.0405, -0.0478,  0.0733,  0.1176,
         -0.0141, -0.0978,  0.0585, -0.0932, -0.0604,  0.1412, -0.0621,  0.1123,
          0.1599,  0.0215, -0.1048, -0.0492,  0.1098, -0.1549, -0.0063,  0.0401,
         -0.0453,  0.1427, -0.1011, -0.1318, -0.0947,  0.1074,  0.0394,  0.0013,
          0.0825, -0.0538,  0.1628, -0.0419],
        [ 0.0164,  0.1466, -0.1341,  0.0147, -0.0629,  0.0910, -0.0372, -0.0124,
         -0.0979, -0.1448, -0.0624,  0.0312,  0.1050, -0.1208,  0.0260, -0.0513,
          0.0158, -0.1169,  0.0172, -0.0266,  0.1531,

In [12]:
for name, param in model.named_parameters():
    print(name)
    print(param, end="\n\n")

convolution_layer.weight
Parameter containing:
tensor([[[[0.2743, 0.3003, 0.1551],
          [0.0062, 0.1478, 0.2129],
          [0.2620, 0.3257, 0.0935]]]], requires_grad=True)

convolution_layer.bias
Parameter containing:
tensor([-0.1651], requires_grad=True)

fc.weight
Parameter containing:
tensor([[ 0.0365,  0.0114, -0.0199,  0.1205,  0.0405, -0.0478,  0.0733,  0.1176,
         -0.0141, -0.0978,  0.0585, -0.0932, -0.0604,  0.1412, -0.0621,  0.1123,
          0.1599,  0.0215, -0.1048, -0.0492,  0.1098, -0.1549, -0.0063,  0.0401,
         -0.0453,  0.1427, -0.1011, -0.1318, -0.0947,  0.1074,  0.0394,  0.0013,
          0.0825, -0.0538,  0.1628, -0.0419],
        [ 0.0164,  0.1466, -0.1341,  0.0147, -0.0629,  0.0910, -0.0372, -0.0124,
         -0.0979, -0.1448, -0.0624,  0.0312,  0.1050, -0.1208,  0.0260, -0.0513,
          0.0158, -0.1169,  0.0172, -0.0266,  0.1531, -0.0754,  0.1243, -0.0908,
         -0.0582,  0.0523, -0.0033, -0.1088,  0.0612,  0.0942, -0.0772,  0.0603,
         -0

### modules、named_modules

In [13]:
for sub_module in model.modules():
    print(sub_module, end="\n\n")

TinnyCNN(
  (convolution_layer): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
  (fc): Linear(in_features=36, out_features=2, bias=True)
)

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

Linear(in_features=36, out_features=2, bias=True)



In [14]:
for name, sub_module in model.named_modules():
    print(name)
    print(sub_module, end="\n\n")


TinnyCNN(
  (convolution_layer): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
  (fc): Linear(in_features=36, out_features=2, bias=True)
)

convolution_layer
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

fc
Linear(in_features=36, out_features=2, bias=True)



### children、named_children  
（作用同modules、named_modules，但不会返回Module自己。）

In [17]:
for sub_module in model.children():
    print(sub_module, end="\n\n")

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

Linear(in_features=36, out_features=2, bias=True)



In [18]:
for name, sub_module in model.named_children():
    print(name)
    print(sub_module, end="\n\n")

convolution_layer
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

fc
Linear(in_features=36, out_features=2, bias=True)



### get_parameter、get_submodule

In [19]:
print(model.get_parameter("fc.bias"))

print(model.get_submodule("convolution_layer"))

print(model.get_submodule("convolution_layer").get_parameter("bias")) # module还可以继续调用get_prameter

Parameter containing:
tensor([-0.1134,  0.0858], requires_grad=True)
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
Parameter containing:
tensor([-0.1651], requires_grad=True)


## 设置模型的参数精度，可选半精度、单精度、双精度等

In [20]:
model = TinnyCNN(2)
for name, param in model.named_parameters():
    print(param.dtype)

torch.float32
torch.float32
torch.float32
torch.float32


In [21]:
model.half()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float16
torch.float16
torch.float16
torch.float16


In [22]:
model.float()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float32
torch.float32
torch.float32
torch.float32


In [23]:
model.double()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float64
torch.float64
torch.float64
torch.float64


In [24]:
model.bfloat16()
for name, param in model.named_parameters():
    print(param.dtype)

torch.bfloat16
torch.bfloat16
torch.bfloat16
torch.bfloat16


## 对子模块执行特定功能

zero_grad：将所有参数的梯度设置为0，或者None

apply：对所有子Module执行指定fn(函数)，常见于参数初始化。

In [25]:
@torch.no_grad()
def init_weights(m):
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        
#         print(m.weight)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))

for param in net.parameters():
    print(param, end="\n\n")
    
net.apply(init_weights)

print("执行apply之后:")
for name, param in net.named_parameters():
    print(name)
    print(param, end="\n\n")

Parameter containing:
tensor([[-0.6685, -0.5494],
        [-0.4601, -0.6651]], requires_grad=True)

Parameter containing:
tensor([0.6618, 0.1359], requires_grad=True)

Parameter containing:
tensor([[-0.6186,  0.5946],
        [ 0.3255, -0.0171]], requires_grad=True)

Parameter containing:
tensor([-0.4386, -0.5716], requires_grad=True)

执行apply之后:
0.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

0.bias
Parameter containing:
tensor([0.6618, 0.1359], requires_grad=True)

1.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

1.bias
Parameter containing:
tensor([-0.4386, -0.5716], requires_grad=True)

