In [4]:
import torch
from torch import nn

net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X=torch.rand(size=(2,4))
net(X)

tensor([[-0.4725],
        [-0.4192]], grad_fn=<AddmmBackward0>)

In [6]:
print(net[2].state_dict())#最后一层的参数

OrderedDict([('weight', tensor([[-0.3160, -0.1669, -0.3527,  0.1377,  0.0631,  0.0388, -0.0791, -0.0927]])), ('bias', tensor([-0.2851]))])


In [12]:
print((type(net[2].bias)))
print(net[2].bias.data)
print(net[1].state_dict())

<class 'torch.nn.parameter.Parameter'>
tensor([-0.2851])
OrderedDict()


In [13]:
#一次性访问所有参数
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [14]:
net.state_dict()['2.bias'].data

tensor([-0.2851])

In [15]:
#从嵌套块中收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())

def block2():
    net=nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}',block1())
    return net

rgnet=nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)

tensor([[-0.3280],
        [-0.3276]], grad_fn=<AddmmBackward0>)

In [16]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [18]:
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([-0.0004, -0.0060, -0.0011, -0.0017]), tensor(0.))

In [20]:
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [25]:
#对于某些块应用不同的初始化方法  不同的层调用不同的初始化
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,42)
net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.0282, -0.5754, -0.3810, -0.6159])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [29]:
#自定义初始化
def my_init(m):
    if type(m)==nn.Linear:
        print("Init",
              *[(name,param.shape) for name, param in m.named_parameters()]
              )
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data*=m.weight.data.abs()>=5
net.apply(my_init)
net[0].weight[:2]

Init ('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
Init ('weight', torch.Size([1, 8])) ('bias', torch.Size([1]))


tensor([[ 0.0000,  9.4278, -0.0000, -6.4314],
        [ 6.1521,  0.0000,  7.4984,  0.0000]], grad_fn=<SliceBackward0>)

In [31]:
#参数绑定
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]==net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])


In [34]:
#自定义个层 构造一个没有任何参数的自定义层
import torch
import torch.nn.functional as  F
from torch import nn

class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,X):
        return X-X.mean()

layer=CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [35]:
#将层作为组件合并到构建更复杂的模型中
net=nn.Sequential(nn.Linear(8,128),CenteredLayer())

Y=net(torch.rand(4,8))
Y.mean()

tensor(2.7940e-09, grad_fn=<MeanBackward0>)

In [37]:
#自定义一个线性模型
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(in_units,units))
        self.bias=nn.Parameter(torch.randn(units,))
    def forward(self,X):
        linear=torch.matmul(X,self.weight.data)+self.bias.data
        return F.relu(linear)
dense=MyLinear(5,3)
dense.weight

Parameter containing:
tensor([[ 0.1821, -0.3363,  1.2713],
        [ 1.1114, -0.8295, -1.2814],
        [ 1.8803,  1.6465, -0.3398],
        [ 0.0374, -0.9628,  2.5953],
        [ 1.1263,  0.2478, -0.5891]], requires_grad=True)

In [38]:
#读写文件 报错训练好的模型
import torch
from torch import nn
from torch.nn import functional as  F

x=torch.arange(4)
torch.save(x,'x-file')
x2=torch.load("x-file")
x2

tensor([0, 1, 2, 3])

In [46]:
#加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)
        self.output=nn.Linear(256,10)
    def forward(self,x):
        return self.output(F.relu(self.hidden(x)))
net=MLP()
X=torch.randn(size=(2,20))
Y=net(X)
torch.save(net.state_dict(),'mlp.params')
print(net.state_dict())
print(type(net.state_dict()))

OrderedDict([('hidden.weight', tensor([[-0.0311,  0.0082,  0.0464,  ..., -0.1226,  0.1595,  0.1827],
        [ 0.0442,  0.1065, -0.1639,  ...,  0.0082,  0.0331, -0.1212],
        [ 0.1646,  0.0586, -0.0820,  ...,  0.0434, -0.1117,  0.0766],
        ...,
        [ 0.0465, -0.0363,  0.1441,  ...,  0.1482,  0.0459, -0.0894],
        [ 0.0827,  0.0995, -0.0736,  ..., -0.0425,  0.0896,  0.0827],
        [ 0.0088,  0.2214, -0.1845,  ...,  0.0983,  0.2188, -0.1782]])), ('hidden.bias', tensor([ 0.1971,  0.1022, -0.1907, -0.1759, -0.1231, -0.2226,  0.0671, -0.0821,
         0.0201,  0.0263, -0.0987,  0.1530,  0.1816,  0.1677,  0.0724, -0.0162,
         0.1157,  0.2058,  0.0453,  0.0908,  0.0809, -0.0442,  0.1647, -0.0873,
        -0.0636,  0.0684,  0.1343,  0.0587,  0.1921, -0.0102, -0.0255, -0.2144,
         0.1971,  0.0031,  0.1754,  0.0208,  0.0699,  0.0284,  0.0735, -0.0973,
        -0.1581,  0.2067,  0.1885, -0.0691,  0.1343, -0.1236, -0.1698, -0.0325,
        -0.1686,  0.0061, -0.0471,  0

In [47]:
clone=MLP()
clone.load_state_dict(torch.load("mlp.params"))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [48]:
Y_clone=clone(X)
Y_clone==Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])