### 模型构造

In [4]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
# 2是批量
x = torch.rand(2, 20)
net(x)

tensor([[-0.0203,  0.0410, -0.2032,  0.0953, -0.1609, -0.1441, -0.0050,  0.0776,
          0.0980,  0.1293],
        [ 0.0806, -0.0434, -0.2489,  0.0219, -0.1752, -0.0655,  0.0977,  0.1105,
          0.1724,  0.0391]], grad_fn=<AddmmBackward0>)

In [5]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))
        

In [6]:
net = MLP()
net(x)

tensor([[ 0.0330, -0.0497,  0.1067, -0.0009,  0.1323,  0.1083,  0.1006, -0.1845,
          0.1732,  0.0159],
        [ 0.0327, -0.1661,  0.0742, -0.0575,  0.1296,  0.0267,  0.1361, -0.1510,
          0.1024, -0.0115]], grad_fn=<AddmmBackward0>)

In [12]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
            
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X
        
net = MySequential(nn.Linear(20, 10))
net(x)

tensor([[-0.2693,  0.3937,  0.0385, -0.6000,  0.1997, -0.2415, -0.3407,  0.3382,
          0.0789,  0.4627],
        [-0.0206,  0.4341,  0.1335, -0.6900,  0.3566, -0.2355, -0.2501,  0.4289,
          0.0290,  0.1454]], grad_fn=<AddmmBackward0>)

定义更加灵活的块

In [14]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((10, 20), requires_grad = False)
        self.Linear = nn.Linear(20, 10)
        
    def forward(self, X):
        X = self.Linear(X)
        X = torch.mm(X, self.rand_weight)
        X = self.Linear(X)
        return X
    
net = FixedHiddenMLP()
net(x)

tensor([[-0.2837,  0.0798,  0.2018,  0.1719, -0.2417,  0.2890,  0.1608,  0.1571,
          0.1869, -0.1019],
        [-0.3181,  0.0933,  0.1251,  0.2260, -0.1404,  0.3540,  0.1452, -0.0476,
          0.3948, -0.1434]], grad_fn=<AddmmBackward0>)

### 参数访问

In [16]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4))
x = torch.rand(2, 4)
net(x)

tensor([[ 0.1544,  0.0381, -0.3023,  0.0227],
        [ 0.1884,  0.1420, -0.2369,  0.0068]], grad_fn=<AddmmBackward0>)

In [21]:
print(net[2].state_dict())
print(net[1].state_dict())
print(net[2].bias)

OrderedDict([('weight', tensor([[-0.3046,  0.1477,  0.1656,  0.1473, -0.0522, -0.1839, -0.2142, -0.1239],
        [-0.2866,  0.2416, -0.1178, -0.1498,  0.0545, -0.2755, -0.2097, -0.0272],
        [ 0.0259, -0.0271, -0.3413, -0.2285,  0.1450,  0.1057,  0.3014,  0.2714],
        [-0.0348,  0.0826,  0.1509,  0.2702, -0.2577,  0.0259,  0.1250,  0.2295]])), ('bias', tensor([ 0.3504,  0.3286, -0.1921, -0.0061]))])
OrderedDict()
Parameter containing:
tensor([ 0.3504,  0.3286, -0.1921, -0.0061], requires_grad=True)


In [22]:
net[2].weight.grad == None

True

In [23]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


### 从嵌套块收集参数