In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F # nn.functional.py中存放激活函数等的实现
 
@torch.no_grad()#初始化模型避免计算梯度
def init_weights(m):
    print("xxxx:", m)
    if type(m) == nn.Linear:#初始化线性层的权重
         m.weight.fill_(1.0)
         print("yyyy:", m.weight)
 
class Model(nn.Module):
    def __init__(self):
        # 在实现自己的__init__函数时,为了正确初始化自定义的神经网络模块,一定要先调用super().__init__
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5) # submodule(child module)
        self.conv2 = nn.Conv2d(20, 20, 5)
        self.add_module("conv3", nn.Conv2d(10, 40, 5)) # 添加一个submodule到当前module,等价于self.conv3 = nn.Conv2d(10, 40, 5)
        
        self.register_buffer("buffer", torch.randn([2,3])) # 给module添加一个presistent(持久的) buffer

        self.param1 = nn.Parameter(torch.rand([1])) # module参数的tensor 静态
        self.register_parameter("param2", nn.Parameter(torch.rand([1]))) # 向module添加参数  动态
 
        # nn.Sequential: 顺序容器,module将按照它们在构造函数中传递的顺序添加,它允许将整个容器视为单个module
        self.feature = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
        self.feature.apply(init_weights) # 将fn递归应用于每个submodule,典型用途为初始化模型参数
        self.feature.to(torch.double) # 将参数数据类型转换为double
        cpu = torch.device("cpu")
        self.feature.to(cpu) # 将参数数据转换到cpu设备上
 
    def forward(self, x):
       x = F.relu(self.conv1(x))

In [2]:
model = Model()
print("## Model:", model)

xxxx: Linear(in_features=2, out_features=2, bias=True)
yyyy: Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
xxxx: Linear(in_features=2, out_features=2, bias=True)
yyyy: Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
xxxx: Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)
## Model: Model(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
  (feature): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Linear(in_features=2, out_features=2, bias=True)
  )
)


In [5]:
model.cpu() # 将所有模型参数和buffers移动到CPU上
print("Model device:", next(model.parameters()).device) 

Model device: cpu


In [6]:
# 将模型参数转换为float数据类型
model.float()
print("Parameter dtype:", next(model.parameters()).dtype)  # 打印模型参数的数据类型

Parameter dtype: torch.float32


In [10]:
model.zero_grad() # 将所有模型参数的梯度设置为零


In [11]:
# state_dict:返回一个字典,保存着module的所有状态,参数和persistent buffers都会包含在字典中,字典的key就是参数和buffer的names
print("## state_dict:", model.state_dict().keys())

## state_dict: odict_keys(['param1', 'param2', 'buffer', 'conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'conv3.weight', 'conv3.bias', 'feature.0.weight', 'feature.0.bias', 'feature.1.weight', 'feature.1.bias'])


In [12]:
for name, buffers in model.named_buffers(): # 返回module的buffers的迭代器,产生(yield)buffer的名称以及buffer本身
    print(f"## named_buffers: name: {name}; buffers size: {buffers.size()}")


## named_buffers: name: buffer; buffers size: torch.Size([2, 3])


In [13]:
for children in model.children():
    print("## children:", children)

## children: Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
## children: Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
## children: Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
## children: Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)


In [14]:
for name,children in model.named_children():
    print("## named_children: name:", name, "children:", children)

## named_children: name: conv1 children: Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
## named_children: name: conv2 children: Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
## named_children: name: conv3 children: Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
## named_children: name: feature children: Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)


In [15]:
for moudles in model.modules():
    print("## modules:", moudles)

## modules: Model(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
  (feature): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Linear(in_features=2, out_features=2, bias=True)
  )
)
## modules: Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
## modules: Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
## modules: Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
## modules: Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)
## modules: Linear(in_features=2, out_features=2, bias=True)
## modules: Linear(in_features=2, out_features=2, bias=True)


In [16]:
for name,modules in model.named_modules():
    print("## named_modules: name:", name, "modules:", modules)

## named_modules: name:  modules: Model(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
  (feature): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Linear(in_features=2, out_features=2, bias=True)
  )
)
## named_modules: name: conv1 modules: Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
## named_modules: name: conv2 modules: Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
## named_modules: name: conv3 modules: Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
## named_modules: name: feature modules: Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)
## named_modules: name: feature.0 modules: Linear(in_features=2, out_features=2, bias=True)
## named_modules: name: feature.1 modules: Linear(in_features=2, out_features=2, bias=True)


In [17]:
model.train() # 将模型设置为训练模式

Model(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
  (feature): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Linear(in_features=2, out_features=2, bias=True)
  )
)

In [18]:
model.eval() # 将模型设置为评估模式 

Model(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(10, 40, kernel_size=(5, 5), stride=(1, 1))
  (feature): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Linear(in_features=2, out_features=2, bias=True)
  )
)