In [1]:
# init包包含了多种模型初始化方法
from mxnet import init, nd
from mxnet.gluon import nn

In [2]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()  # 使用默认初始化方式

x = nd.random.uniform(shape=(2, 20))
y = net(x)  # 前向计算

In [3]:
# 访问模型参数
net[0].params, type(net[0].params)

(dense0_ (
   Parameter dense0_weight (shape=(256, 20), dtype=float32)
   Parameter dense0_bias (shape=(256,), dtype=float32)
 ), mxnet.gluon.parameter.ParameterDict)

In [5]:
net[0].params['dense0_weight'], net[0].weight

(Parameter dense0_weight (shape=(256, 20), dtype=float32),
 Parameter dense0_weight (shape=(256, 20), dtype=float32))

In [6]:
# Gluon 里参数类型为Parameter类，它包含参数和梯度的数值，可以
# 分别通过data和grad函数来访问
net[0].weight.data(), net[0].weight.grad()

(
 [[ 0.06700657 -0.00369488  0.0418822  ..., -0.05517294 -0.01194733
   -0.00369594]
  [-0.03296221 -0.04391347  0.03839272 ...,  0.05636378  0.02545484
   -0.007007  ]
  [-0.0196689   0.01582889 -0.00881553 ...,  0.01509629 -0.01908049
   -0.02449339]
  ..., 
  [ 0.00010955  0.0439323  -0.04911506 ...,  0.06975312  0.0449558
   -0.03283203]
  [ 0.04106557  0.05671307 -0.00066976 ...,  0.06387014 -0.01292654
    0.00974177]
  [ 0.00297424 -0.0281784  -0.06881659 ..., -0.04047417  0.00457048
    0.05696651]]
 <NDArray 256x20 @cpu(0)>, 
 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]]
 <NDArray 256x20 @cpu(0)>)

In [8]:
net[1].bias.data()


[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
<NDArray 10 @cpu(0)>

In [13]:
# 可以通过collect_params 函数来获取net变量所有嵌套的层所包含的所有参数
net.collect_params()

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
  Parameter dense1_bias (shape=(10,), dtype=float32)
)

In [10]:
# 可以通过正则表达式匹配参数名
net.collect_params('.*weight')

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
)

In [14]:
# 初始化模型参数
# 一般模型的默认初始化方法：权重参数元素为[-0.07, 0.07]之间均匀分布的随机数
# 偏差参数则全为0.
# 非首次对模型初始化需要指定 force_reinit=True
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[ 0.01074176  0.00066428  0.00848699 -0.0080038  -0.00168822  0.00936328
  0.00357444  0.00779328 -0.01010307 -0.00391573  0.01316619 -0.00432926
  0.0071536   0.00925416 -0.00904951 -0.00074684  0.0082254  -0.01878511
  0.00885884  0.01911872]
<NDArray 20 @cpu(0)>

In [15]:
# 使用常数初始化权重参数
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.]
<NDArray 20 @cpu(0)>

In [16]:
# 对特定参数进行初始化操作
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[0].weight.data()[0]


[ 0.00512482 -0.06579044 -0.10849719 -0.09586414  0.06394844  0.06029618
 -0.03065033 -0.01086642  0.01929168  0.1003869  -0.09339568 -0.08703034
 -0.10472868 -0.09879824 -0.00352201 -0.11063069 -0.04257748  0.06548801
  0.12987629 -0.13846186]
<NDArray 20 @cpu(0)>

In [17]:
# 自定义初始化方法
# 令权重有一半概率初始化为0， 另一半概率初始化为[-10, -5],[5, 10]两个区间
# 均匀分布的随机数
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense0_weight (256, 20)
Init dense1_weight (10, 256)



[-5.36596727  7.57739449  8.98637581 -0.          8.8275547   0.
  5.98405075 -0.          0.          0.          7.48575974 -0.         -0.
  6.89100075  6.97887039 -6.11315536  0.          5.46652031 -9.73526287
  9.48517227]
<NDArray 20 @cpu(0)>

In [18]:
# 通过Parameter类的set_data函数来直接改写模型参数
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[ -4.36596727   8.57739449   9.98637581   1.           9.8275547    1.
   6.98405075   1.           1.           1.           8.48575974   1.           1.
   7.89100075   7.97887039  -5.11315536   1.           6.46652031
  -8.73526287  10.48517227]
<NDArray 20 @cpu(0)>

In [19]:
# 共享模型参数
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))
net.initialize()
x = nd.random.uniform(shape=(2, 20))
net(x)
net[1].weight.data()[0] == net[2].weight.data()[0]


[ 1.  1.  1.  1.  1.  1.  1.  1.]
<NDArray 8 @cpu(0)>