In [1]:
from mxnet import init, nd
from mxnet.gluon import nn

In [2]:
# 构建一个网络，用于演示查看参数
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [3]:
# 查看参数形状和类型
net[0].params, type(net[0].params)

(dense0_ (
   Parameter dense0_weight (shape=(256, 20), dtype=float32)
   Parameter dense0_bias (shape=(256,), dtype=float32)
 ),
 mxnet.gluon.parameter.ParameterDict)

In [19]:
# 查看第一层权重参数
net[0].weight

Parameter dense0_weight (shape=(256, 20), dtype=float32)

In [5]:
# 查看第一层权重参数数值
net[0].weight.data()


[[ 0.06700657 -0.00369488  0.0418822  ... -0.05517294 -0.01194733
  -0.00369594]
 [-0.03296221 -0.04391347  0.03839272 ...  0.05636378  0.02545484
  -0.007007  ]
 [-0.0196689   0.01582889 -0.00881553 ...  0.01509629 -0.01908049
  -0.02449339]
 ...
 [ 0.00010955  0.0439323  -0.04911506 ...  0.06975312  0.0449558
  -0.03283203]
 [ 0.04106557  0.05671307 -0.00066976 ...  0.06387014 -0.01292654
   0.00974177]
 [ 0.00297424 -0.0281784  -0.06881659 ... -0.04047417  0.00457048
   0.05696651]]
<NDArray 256x20 @cpu(0)>

In [6]:
# 查看参数梯度
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [7]:
# 查看偏差参数
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [8]:
# 获取全部参数
net.collect_params()

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
  Parameter dense1_bias (shape=(10,), dtype=float32)
)

In [9]:
# 使用正则表达式提取权重参数
net.collect_params('.*weight')

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
)

In [10]:
# 使用init类初始化参数，非第一次初始化参数要设置force_reinit为True
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()


[[ 0.00195949 -0.0173764   0.00047347 ...  0.01376901  0.00205885
   0.00994352]
 [-0.00235806  0.00298818 -0.01998208 ...  0.00958589 -0.01497647
   0.00660516]
 [-0.00189036 -0.00273026  0.00918154 ... -0.00410552  0.01284537
   0.0021001 ]
 ...
 [ 0.01049459 -0.0011141   0.01018339 ...  0.00804102  0.00893996
   0.01025588]
 [ 0.02153218 -0.00600865  0.01023765 ... -0.00775244 -0.00293792
   0.00165691]
 [-0.0033118   0.00115615  0.00131705 ... -0.0214244   0.00828725
   0.00413998]]
<NDArray 256x20 @cpu(0)>

In [11]:
# 同上
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [12]:
# 初始化特定参数，可见网络与权重都具有初始化函数，网络的初始化函数会初始化每一个参数
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[0].weight.data()[0]


[ 0.00512482 -0.06579044 -0.10849719 -0.09586414  0.06394844  0.06029618
 -0.03065033 -0.01086642  0.01929168  0.1003869  -0.09339568 -0.08703034
 -0.10472868 -0.09879824 -0.00352201 -0.11063069 -0.04257748  0.06548801
  0.12987629 -0.13846186]
<NDArray 20 @cpu(0)>

In [16]:
# 使用自定义初始化方法，继承init.Initializer，重写_init_weight方法
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense0_weight (256, 20)
Init dense1_weight (10, 256)



[-5.3659673  7.5773945  8.986376  -0.         8.827555   0.
  5.9840508 -0.         0.         0.         7.4857597 -0.
 -0.         6.8910007  6.9788704 -6.1131554  0.         5.4665203
 -9.735263   9.485172 ]
<NDArray 20 @cpu(0)>

In [18]:
# 也可以直接使用weight.set_data函数来手动赋值
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[-4.3659673  8.5773945  9.986376   1.         9.827555   1.
  6.9840508  1.         1.         1.         8.48576    1.
  1.         7.8910007  7.9788704 -5.1131554  1.         6.4665203
 -8.735263  10.485172 ]
<NDArray 20 @cpu(0)>

In [21]:
# 共享参数
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared, 
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

In [30]:
# 不给输入，看参数形状，参数形状默认输入为0
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

net.collect_params()

sequential5_ (
  Parameter dense14_weight (shape=(256, 0), dtype=float32)
  Parameter dense14_bias (shape=(256,), dtype=float32)
  Parameter dense15_weight (shape=(10, 0), dtype=float32)
  Parameter dense15_bias (shape=(10,), dtype=float32)
)

In [32]:
net(X)
net.collect_params()

sequential5_ (
  Parameter dense14_weight (shape=(256, 80), dtype=float32)
  Parameter dense14_bias (shape=(256,), dtype=float32)
  Parameter dense15_weight (shape=(10, 256), dtype=float32)
  Parameter dense15_bias (shape=(10,), dtype=float32)
)

In [33]:
# 已经前向计算过的网络无法重新初始化为不同的输入形状
X = nd.random.uniform(shape=(2,80))
net.initialize(force_reinit=True)
net.collect_params()

sequential5_ (
  Parameter dense14_weight (shape=(256, 80), dtype=float32)
  Parameter dense14_bias (shape=(256,), dtype=float32)
  Parameter dense15_weight (shape=(10, 256), dtype=float32)
  Parameter dense15_bias (shape=(10,), dtype=float32)
)