In [8]:
import torch
import torch.nn as nn

class Linear(nn.Module):
    def __init__(self, ch_in, ch_out):
        super().__init__()
        self.conv = nn.Linear(ch_in, ch_out)
    def forward(self, x):
        return self.conv(x)
        
        
class Net(nn.Module):
    def __init__(self, ch_in, ch_out):
        super().__init__()
        # 嵌套Inner类实例，属于定义网络常见的情况
        self.linear_inner = Linear(ch_in, ch_in)
        self.linear_outer = nn.Linear(ch_in, ch_out)
    def forward(self, x):
        out = self.linear_inner(x)
        out = self.linear_outer(x)
        return out
    
    def init_weights(self):
#         print(help(self.modules))
        # 递归获得网络的所有子代Module
        for op in self.modules():
            # 针对不同类型操作采用不同初始化方式
            if isinstance(op, nn.Linear):
                nn.init.constant_(op.weight.data, val=1)
                nn.init.constant_(op.bias.data, val=0)
            # 这里可以对Conv等操作进行其它方式的初始化
            else:
                pass

In [9]:
# 定义网络
net = Net(1, 1)
# 执行初始化函数
net.init_weights()
# 前向推理，获得网络输出
x = torch.ones(size=(1, 1))
out = net(x)
print(out)

Help on method modules in module torch.nn.modules.module:

modules() method of __main__.Net instance
    Returns an iterator over all modules in the network.
    
    Yields:
        Module: a module in the network
    
    Note:
        Duplicate modules are returned only once. In the following
        example, ``l`` will be returned only once.
    
    Example::
    
        >>> l = nn.Linear(2, 2)
        >>> net = nn.Sequential(l, l)
        >>> for idx, m in enumerate(net.modules()):
                print(idx, '->', m)
    
        0 -> Sequential(
          (0): Linear(in_features=2, out_features=2, bias=True)
          (1): Linear(in_features=2, out_features=2, bias=True)
        )
        1 -> Linear(in_features=2, out_features=2, bias=True)

None
tensor([[1.]], grad_fn=<AddmmBackward>)


In [6]:
# 遍历网络包含的参数，观察初始化结果
for param in net.named_parameters():
    print(param) 

('linear_inner.conv.weight', Parameter containing:
tensor([[1.]], requires_grad=True))
('linear_inner.conv.bias', Parameter containing:
tensor([0.], requires_grad=True))
('linear_outer.weight', Parameter containing:
tensor([[1.]], requires_grad=True))
('linear_outer.bias', Parameter containing:
tensor([0.], requires_grad=True))


In [7]:
import torch.optim
print(help(torch.optim.SGD))

Help on class SGD in module torch.optim.sgd:

class SGD(torch.optim.optimizer.Optimizer)
 |  Implements stochastic gradient descent (optionally with momentum).
 |  
 |  Nesterov momentum is based on the formula from
 |  `On the importance of initialization and momentum in deep learning`__.
 |  
 |  Args:
 |      params (iterable): iterable of parameters to optimize or dicts defining
 |          parameter groups
 |      lr (float): learning rate
 |      momentum (float, optional): momentum factor (default: 0)
 |      weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 |      dampening (float, optional): dampening for momentum (default: 0)
 |      nesterov (bool, optional): enables Nesterov momentum (default: False)
 |  
 |  Example:
 |      >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
 |      >>> optimizer.zero_grad()
 |      >>> loss_fn(model(input), target).backward()
 |      >>> optimizer.step()
 |  
 |  __ http://www.cs.toronto.edu/%7