In [None]:
'''
原理上都很好明白～注意细节！！
第一种：直接继承父类进行改造，传入**kwargs
第二种：
１．使用sequential,借助orderdict和self.add_module()方法，
但是有一些写法上的细节需要注意
２．利用modulelist
3.利用moduledict
第三种：设计更复杂的。。。可以共用一层的参数，可以指定一些常数参数（不参与训练过程）
'''

In [1]:
import torch
from torch import nn

In [11]:
class MLP(nn.Module):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        #调用父类（继承类）的构造函数进行初始化，这样在构造实例的时候可以指定参数
        #比如“模型参数的访问，初始化和共享等”，**kwargs可以表示接收０个参数，
        #所以不必非得在实例化的时候传入参数
        self.linear = nn.Linear(784,256)
        self.act = nn.ReLU()
        self.output=nn.Linear(256,10)
    def forward(self,x):
        a = self.act(self.linear(x))
        return self.output(a)

In [12]:
x = torch.rand(2,784)
net = MLP()
print(net)

MLP(
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


In [13]:
print(net(x))

tensor([[-0.1177, -0.0111,  0.0996,  0.0075, -0.1078,  0.1201, -0.1106,  0.0028,
         -0.0619,  0.0077],
        [-0.0937, -0.0503,  0.0897,  0.0755, -0.0514,  0.1972, -0.0423, -0.0307,
          0.1203,  0.0687]], grad_fn=<AddmmBackward>)


In [17]:
for param in net.parameters():
    print(param.shape)

torch.Size([256, 784])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])


In [20]:
from collections import OrderedDict
class MySequential(nn.Module):
    def __init__(self,*args):
        super(MySequential,self).__init__()
        if len(args)==1 and isinstance(args[0],OrderedDict):#判断是否传入的是字典
            for key, module in args[0].items():
                self.add_module(key,module)
            #add_module（）方法会将module添加进self._modules(一个OrderDict)
        else: #传入的是一些Module
            for idx, module in enumerate(args):
                self.add_module(str(idx),module)
    def forward(self,input):
        for module in self._modules.values():
        #self._modules会返回一个OrderDict,保证会按照成员添加时候的顺序进行遍历
            input = module(input)
        return input

In [21]:
net = MySequential(
nn.Linear(784,256),
nn.ReLU(),
nn.Linear(256,10))
print(net)
net(x)

MySequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.0288,  0.0495, -0.2580, -0.0660, -0.0081, -0.0547,  0.0561,  0.0418,
         -0.1407, -0.1555],
        [-0.0120,  0.0075, -0.1012,  0.0717, -0.0197, -0.0095,  0.1904,  0.0364,
         -0.1729, -0.1627]], grad_fn=<AddmmBackward>)

In [22]:
net = nn.ModuleList([nn.Linear(784,256),nn.ReLU()])
net.append(nn.Linear(256,10))
print(net[-1])
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [24]:
# net(torch.zeros(1, 784)) # 会报NotImplementedError

In [28]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule,self).__init__()
        self.linear = nn.ModuleList([nn.Linear(10,10) for i in range(10)])
    def forward(self,x):
        for i,l in enumerate(self.linear):
            print(i,l)
            x = self.linear[i//2](x)+l(x)
        return x

In [29]:
net=MyModule()
print(net)

MyModule(
  (linear): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Linear(in_features=10, out_features=10, bias=True)
    (4): Linear(in_features=10, out_features=10, bias=True)
    (5): Linear(in_features=10, out_features=10, bias=True)
    (6): Linear(in_features=10, out_features=10, bias=True)
    (7): Linear(in_features=10, out_features=10, bias=True)
    (8): Linear(in_features=10, out_features=10, bias=True)
    (9): Linear(in_features=10, out_features=10, bias=True)
  )
)


In [37]:
class Module_ModuleList(nn.Module):
    def __init__(self):
        super(Module_ModuleList,self).__init__()
        self.linear = nn.ModuleList([nn.Linear(10,10)])
class Module_List(nn.Module):
    def __init__(self):
        super(Module_List,self).__init__()
        self.linear = [nn.Linear(10,10)]
net1 = Module_ModuleList()
net2 = Module_List()

print('----net1----')
for p in net1.parameters():
    #print(p)
    print(p.size())
print('---net2---')
for p in net2.parameters():
    print(p)
print(list(net2.parameters()))##这是个什么情况啊～～～

----net1----
torch.Size([10, 10])
torch.Size([10])
---net2---
[]


In [38]:
net = nn.ModuleDict({
    'linear':nn.Linear(784,256),
    'act':nn.ReLU(),
})
net['output']=nn.Linear(256,10)
print(net['linear'])
print(net.output)
print(net)

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (act): ReLU()
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)


In [46]:
class fancymlp(nn.Module):
    def __init__(self,**kwargs):
        super(fancymlp,self).__init__(**kwargs)
        self.rand_weight = torch.rand((20,20),requires_grad=False)
        self.linear = nn.Linear(20,20)
    def forward(self,x):
        x = self.linear(x)
        x= nn.functional.relu(torch.mm(x,self.rand_weight)+1)
        x= self.linear(x)
        while x.norm().item()>1:
            x = x/2
        if x.norm().item()<0.8:
            x = x*10
        return x.sum()

In [48]:
x = torch.rand(2,20)
net = fancymlp()
print(net)
print(net(x))

fancymlp(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)
tensor(-0.3788, grad_fn=<SumBackward0>)


In [49]:
class netmlp(nn.Module):
    def __init__(self,**kwargs):
        super(netmlp,self).__init__()
        self.net = nn.Sequential(nn.Linear(40,30),nn.ReLU())
    def forward(self,x):
        return self.net(x)
net = netmlp()
print(net)

netmlp(
  (net): Sequential(
    (0): Linear(in_features=40, out_features=30, bias=True)
    (1): ReLU()
  )
)


In [52]:
net = nn.Sequential(netmlp(),nn.Linear(30,20),fancymlp())
print(net)

Sequential(
  (0): netmlp(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): fancymlp(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)


In [53]:
x = torch.rand(2,40)
print(net(x))

tensor(-7.6870, grad_fn=<SumBackward0>)
