In [1]:
from mxnet import nd
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(256, activation='relu')) #输出256
    net.add(nn.Dense(10)) #输出10

In [2]:
net.initialize()
x = nd.random.uniform(shape=(2, 20))
print(net(x))
print('hidden layer:', net[0])
print('output layer:', net[1])


[[ 0.09543003  0.04614332 -0.00286653 -0.07790346 -0.05130243  0.02942039
   0.08696645 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099705  0.00856576 -0.04467198 -0.0692684   0.09132432
   0.06786594 -0.06187843 -0.03436674  0.04234695]]
<NDArray 2x10 @cpu(0)>
hidden layer: Dense(20 -> 256, Activation(relu))
output layer: Dense(256 -> 10, linear)


### 使用nn.Block构造模型 

In [3]:
#构造多层感知机
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs) #调用MLP父类nn.Block的构造函数__init__。
        with self.name_scope(): #指定模型参数的名字的前缀。
            self.hidden = nn.Dense(256, activation='relu')
            self.output = nn.Dense(10)
            
    def forward(self, x):
        return self.output(self.hidden(x))

任意一个nn.Block的子类至少实现以下两个函数：

1. __init__: 创建模型的参数

2. forward: 定义模型的计算

In [5]:
net = MLP()
net.initialize()
print(net(x))
print('hidden layer name with default prefix:', net.hidden.name)
print('output layer name with default prefix:', net.output.name)


[[ 0.07787763  0.00216402  0.016822    0.0305988  -0.00702019  0.01668715
   0.04822846  0.0039432  -0.09300035 -0.04494302]
 [ 0.08891079 -0.00625484 -0.01619132  0.03807179 -0.01451489  0.02006173
   0.0303478   0.02463485 -0.07605447 -0.04389168]]
<NDArray 2x10 @cpu(0)>
hidden layer name with default prefix: mlp1_dense0
output layer name with default prefix: mlp1_dense1


In [7]:
net = MLP(prefix = 'my_mlp_')
net.initialize()
print(net(x))
print('hidden layer name with default prefix:', net.hidden.name)
print('output layer name with default prefix:', net.output.name)


[[-0.08123872  0.0231976   0.06555708  0.00410997 -0.00114774  0.05492052
  -0.03296161  0.02032991  0.01187696  0.09007928]
 [-0.11413839 -0.01479331  0.00744748  0.01141047 -0.019035    0.03110916
  -0.01071917 -0.0104462   0.01164494  0.04971804]]
<NDArray 2x10 @cpu(0)>
hidden layer name with default prefix: my_mlp_dense0
output layer name with default prefix: my_mlp_dense1


In [10]:
class MLP_NO_NAMESCOPE(nn.Block):
    def __init__(self, **kwargs):
        super(MLP_NO_NAMESCOPE, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)
        
    def forward(self, x):
        return self.output(self.hidden(x))
net = MLP_NO_NAMESCOPE(prefix = 'my_mlp_')
print('hidden layer name without prefix:', net.hidden.name)
print('output layer name without prefix:', net.output.name)

hidden layer name without prefix: dense4
output layer name without prefix: dense5


nn.Sequential 是特殊的nn.Block，我们可以使用nn.Block来自定义一个简单版的Sequential

In [11]:
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)
        
    def add(self, block):
        self._children.append(block)
        
    def forward(self, x):
        for block in self._children:
            x = block(x)
        return x

In [12]:
net = MySequential()
with net.name_scope():
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))
net.initialize()
net(x)


[[ 0.00362339  0.00655489 -0.01794356 -0.00904963  0.04215319 -0.09644581
   0.10013397 -0.0059631   0.07653392 -0.03904235]
 [ 0.01506376  0.06130073 -0.00461883 -0.01592598  0.03681139 -0.11513808
   0.07021289  0.02783955  0.00778558 -0.06870455]]
<NDArray 2x10 @cpu(0)>

一些其他的例子

In [19]:
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = nd.random_uniform(shape=(10, 20))
        with self.name_scope():
            self.dense = nn.Dense(10, activation='relu')
    
    def forward(self, x):
        x = self.dense(x)
        x = nd.relu(nd.dot(x, self.rand_weight) + 1)
        x = self.dense(x)
        return x

In [21]:
net = FancyMLP()
net.initialize()
net(x)


[[ 0.          0.          0.34971458  0.          0.28557241  0.          0.
   0.35742062  0.27483869  0.        ]
 [ 0.          0.          0.3475008   0.          0.28735578  0.          0.
   0.35511348  0.27730089  0.        ]]
<NDArray 2x10 @cpu(0)>

In [22]:
print(net)

FancyMLP(
  (dense): Dense(20 -> 10, Activation(relu))
)


In [15]:
# 嵌套使用 nn.Sequential 和 nn.Block
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        with self.name_scope():
            self.net.add(nn.Dense(64, activation='relu'))
            self.net.add(nn.Dense(32, activation='relu'))
            self.dense = nn.Dense(16, activation='relu')
            
    def forward(self, x):
        return self.dense(self.net(x))
    
net = nn.Sequential()
net.add(NestMLP())
net.add(nn.Dense(10))
net.initialize()
print(net(x))


[[  1.85909070e-04   7.94102612e-04  -4.68846178e-04   5.55210223e-04
    6.32922747e-05  -2.69089389e-04  -3.28947848e-04  -2.83379806e-04
   -3.36894940e-04  -3.11668700e-04]
 [  1.44344784e-04   6.62787235e-04  -4.57421003e-04   3.75456526e-04
    2.12090905e-04  -5.47116040e-04  -1.03839884e-04  -6.24075357e-04
   -4.92089195e-04  -2.48089316e-04]]
<NDArray 2x10 @cpu(0)>


In [18]:
# 嵌套使用 nn.Sequential 和 nn.Block
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        with self.name_scope():
            self.denses = [nn.Dense(64, activation='relu'), nn.Dense(32, activation='relu'), nn.Dense(16)]
            
    def forward(self, x):
        for dense in self.denses:
            x = dense(x)
        return x
    
net = nn.Sequential()
net.add(NestMLP())
net.add(nn.Dense(10))
net.initialize()
print(net(x))

  .format(name=self.__class__.__name__ + "." + k))


RuntimeError: Parameter nestmlp3_dense0_weight has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks