In [3]:
import torch
from torch import nn

In [4]:
# 定义模型
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    def forward(self,X):
        for block in self._modules.values():
            X = block(X)
        return X

In [5]:
# 参数管理
net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,1))
X = torch.rand(size=(2,4))
net(X)

tensor([[ 0.0018],
        [-0.0171]], grad_fn=<AddmmBackward0>)

In [7]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.3053, -0.0993, -0.0697,  0.2957,  0.3099,  0.0227,  0.0925, -0.3512]])), ('bias', tensor([-0.1894]))])


In [8]:
# 目标参数
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1894], requires_grad=True)
tensor([-0.1894])


In [10]:
# 一次性访问所有参数
print(*[(name, param.shape) for name , param in net[0].named_parameters()])
print(*[(name, param.shape) for name , param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [13]:
net.state_dict()['2.bias'].data
net.state_dict()['2.weight'].data

tensor([[ 0.3053, -0.0993, -0.0697,  0.2957,  0.3099,  0.0227,  0.0925, -0.3512]])

In [None]:
# 从嵌套块收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,4), nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}, block1()')
    return net
rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)


In [None]:
# 内置初始化
def init_normal(m)