In [3]:
# 使用nn.Sequential 定义一个多层感知机

import sys
from mxnet import init, gluon, nd
from mxnet.gluon import nn

class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.hidden = nn.Dense(4)
            self.output = nn.Dense(2)
    
    def forward(self, x):
        return self.output(nd.relu(self.hidden(x)))

In [5]:
x = nd.random.uniform(shape=(3, 5))
try:
    net = MLP()
    net.initialize()
    net(x)
except RuntimeError as err:
    sys.stderr.write(str(err))

#### 访问模型参数

In [7]:
my_param = gluon.Parameter('good_param', shape=(2, 3)) # 使用 Parameter 来定义参数
my_param.initialize()
print('data:', my_param.data(), '\n grad:', my_param.grad(), '\n name:', my_param.name)

data: 
[[-0.007007   -0.0196689   0.01582889]
 [-0.00881553  0.0563288   0.02766836]]
<NDArray 2x3 @cpu(0)> 
 grad: 
[[ 0.  0.  0.]
 [ 0.  0.  0.]]
<NDArray 2x3 @cpu(0)> 
 name: good_param


In [10]:
#访问开头定义的多层感知机的各项参数：权重weight, 偏差bias
w = net.hidden.weight
b = net.hidden.bias
print('hidden layer name:', net.hidden.name, '\nweight: ', w,'\nbias: ', b)

hidden layer name: mlp2_dense0 
weight:  Parameter mlp2_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>) 
bias:  Parameter mlp2_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)


In [11]:
#参数的值和梯度
print('weight:', w.data(), '\nweight grad:', w.grad(), '\nbias:',b.data(), '\nbais grad:', b.grad())

weight: 
[[ 0.00286685  0.03927409  0.02504314 -0.05344158  0.03088857]
 [ 0.01958894  0.01148278 -0.04993054  0.00523225  0.06225365]
 [ 0.03620619  0.00305876 -0.05517294 -0.01194733 -0.00369594]
 [-0.03296221 -0.04391347  0.03839272  0.03316854 -0.00613896]]
<NDArray 4x5 @cpu(0)> 
weight grad: 
[[ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]
<NDArray 4x5 @cpu(0)> 
bias: 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)> 
bais grad: 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>


In [14]:
# 通过collect_params 来访问
params = net.collect_params()
print(params)
print(params['mlp2_dense0_bias'].data())
print(params.get('dense0_bias').data())

mlp2_ (
  Parameter mlp2_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>)
  Parameter mlp2_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter mlp2_dense1_weight (shape=(2, 4), dtype=<class 'numpy.float32'>)
  Parameter mlp2_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>

[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)>


#### 初始化模型参数

在gluon中，模型的参数总是默认初始化为0。当我们对整个模型所有参数做初始化时，模型下权重参数的所有元素为[-0.07, 0.07]之间均匀分布的随机数。我们也可以使用其他初始化方法。以下例子使用了均值为0，标准差为0.02的正太分布来随机初始化模型中所有层的权重参数。

In [15]:
params = net.collect_params()
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print('hidden weight:', net.hidden.weight.data(), '\nhidden bias:', net.hidden.bias.data(), '\noutput weight:', 
      net.output.weight.data(), '\noutput bias:', net.output.bias.data())

hidden weight: 
[[ 0.00204577  0.02359052 -0.01088966 -0.00684989  0.02217279]
 [ 0.02551372 -0.03666487 -0.01057974  0.01203553  0.00832525]
 [ 0.01018762 -0.048427   -0.00321659  0.00771733 -0.00632596]
 [ 0.02594279 -0.00826195  0.01191583  0.0098195   0.01036525]]
<NDArray 4x5 @cpu(0)> 
hidden bias: 
[ 0.  0.  0.  0.]
<NDArray 4 @cpu(0)> 
output weight: 
[[ 0.00784631  0.00566644  0.02093405  0.00017574]
 [-0.01174674 -0.0067725  -0.00926798 -0.01903343]]
<NDArray 2x4 @cpu(0)> 
output bias: 
[ 0.  0.]
<NDArray 2 @cpu(0)>


也可以把模型中任意层任意参数初始化，例如把上面模型中隐含层的偏差参数初始化为1

In [16]:
net.hidden.bias.initialize(init=init.One(), force_reinit=True)
print(net.hidden.bias.data())


[ 1.  1.  1.  1.]
<NDArray 4 @cpu(0)>


#### 自定义初始化方法
通过重载_init_weight来实现自定义的初始化方法。

In [17]:
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit, self).__init__()
        self._verbose = True
    def _init_weight(self, _, arr):
        #初始化权重，使用out=arr后我们不需指定形状
        nd.random.uniform(low=10, high=20, out=arr)

net = MLP()
net.initialize(MyInit())
net(x)
net.hidden.weight.data()


[[ 14.80893517  11.18727684  16.88661194  13.1798315   18.80475998]
 [ 14.14262962  19.18235397  10.64147472  12.16822147  16.92472076]
 [ 15.65188885  15.66601467  18.65102577  12.65389442  15.08968925]
 [ 15.232481    19.1672287   10.93940544  19.21157646  15.75946522]]
<NDArray 4x5 @cpu(0)>

我们还可以通过Parameter.set_data来直接写模型参数

In [18]:
net = MLP()
net.initialize()
net(x)
print('output layer default weight:', net.output.weight.data())

w = net.output.weight
w.set_data(nd.ones(w.shape))
print('output layer modified weight:', net.output.weight.data())

output layer default weight: 
[[-0.0296133   0.06470639 -0.00933967 -0.03517456]
 [ 0.03585494  0.01066203 -0.01454624  0.01288587]]
<NDArray 2x4 @cpu(0)>
output layer modified weight: 
[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
<NDArray 2x4 @cpu(0)>


延后初始化

In [27]:
net = MLP()
net.collect_params()

mlp6_ (
  Parameter mlp6_dense0_weight (shape=(4, 0), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense1_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [28]:
net.initialize() #调用了initialize后模型参数未初始化
net.collect_params()

mlp6_ (
  Parameter mlp6_dense0_weight (shape=(4, 0), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense1_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter mlp6_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [30]:
print(x)
net(x) #输入数据x后，参数初始化了
net.collect_params()
net.params


[[ 0.08712929  0.6481719   0.0202184   0.36824155  0.83261985]
 [ 0.95715517  0.77815676  0.14035077  0.87001216  0.87008727]
 [ 0.97861832  0.47360805  0.79915857  0.80091077  0.46147937]]
<NDArray 3x5 @cpu(0)>


mlp6_ (

)

共享模型参数

当我们希望在模型的多个层之间共享模型参数时，可以通过nn.Block的params来指定模型参数。

In [22]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(4, activation='relu'))
    net.add(nn.Dense(4, activation='relu'))
    #通过params指定需要共享的模型参数
    net.add(nn.Dense(4, activation='relu', params=net[1].params))
    net.add(nn.Dense(4))

net.initialize()
net(x)
print(net[1].weight.data())
print(net[2].weight.data())


[[ 0.03488377 -0.00997238  0.05298331 -0.05103363]
 [-0.05559913 -0.02824048 -0.05706766  0.00979508]
 [-0.02043347  0.01272219  0.00725428  0.01040554]
 [-0.06529249  0.02144811  0.06565464  0.02129445]]
<NDArray 4x4 @cpu(0)>

[[ 0.03488377 -0.00997238  0.05298331 -0.05103363]
 [-0.05559913 -0.02824048 -0.05706766  0.00979508]
 [-0.02043347  0.01272219  0.00725428  0.01040554]
 [-0.06529249  0.02144811  0.06565464  0.02129445]]
<NDArray 4x4 @cpu(0)>


我们也可以在使用nn.Block构造的多层感知机中，让模型的第二隐含层hidden2和第三层hidden3共享模型参数

In [24]:
class MLP_SHARE(nn.Block):
    def __init__(self, **kwargs):
        super(MLP_SHARE, self).__init__(**kwargs)
        with self.name_scope():
            self.hidden1 = nn.Dense(4, activation='relu')
            self.hidden2 = nn.Dense(4, activation='relu')
            #通过params指定需要共享的模型参数
            self.hidden3 = nn.Dense(4, activation='relu', params=self.hidden2.params)
            self.output = nn.Dense(2)
    
    def forward(self, x):
        return self.output(self.hidden3(self.hidden2(self.hidden1(x))))
net = MLP_SHARE()
net.initialize()
net(x)
print(net.hidden2.weight.data())
print(net.hidden3.weight.data())


[[  1.35317594e-02  -6.55425489e-02   3.97101939e-02  -4.69428152e-02]
 [  3.68058681e-06   1.70069709e-02  -6.29481897e-02   1.08120069e-02]
 [  2.78737247e-02  -3.66950035e-02   6.89354911e-02   6.07899651e-02]
 [ -3.25832441e-02   1.59552321e-02   2.50726864e-02   4.98858839e-03]]
<NDArray 4x4 @cpu(0)>

[[  1.35317594e-02  -6.55425489e-02   3.97101939e-02  -4.69428152e-02]
 [  3.68058681e-06   1.70069709e-02  -6.29481897e-02   1.08120069e-02]
 [  2.78737247e-02  -3.66950035e-02   6.89354911e-02   6.07899651e-02]
 [ -3.25832441e-02   1.59552321e-02   2.50726864e-02   4.98858839e-03]]
<NDArray 4x4 @cpu(0)>
