# 参数管理

我们首先关注具有单隐藏层的多层感知机

In [1]:
import sys
sys.path.append('..') 

In [2]:
from d2l import mindspore as d2l
from mindspore import nn

net = nn.SequentialCell([nn.Dense(4, 8), nn.ReLU(), nn.Dense(8, 1)])
X = d2l.rand((2, 4))
net(X)

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


Tensor(shape=[2, 1], dtype=Float32, value=
[[-5.87999701e-01],
 [-6.28293872e-01]])

参数访问

In [3]:
print(net[2].parameters_dict())

OrderedDict([('2.weight', Parameter (name=2.weight, shape=(1, 8), dtype=Float32, requires_grad=True)), ('2.bias', Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True))])


目标参数

In [4]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.value())

<class 'abc.Parameter'>
Parameter (name=2.bias, shape=(1,), dtype=Float32, requires_grad=True)
[-0.26707265]


一次性访问所有参数

In [5]:
print(*[(name, param.shape) for name, param in net[0].parameters_dict().items()])
print(*[(name, param.shape) for name, param in net.parameters_dict().items()])

('0.weight', (8, 4)) ('0.bias', (8,))
('0.weight', (8, 4)) ('0.bias', (8,)) ('2.weight', (1, 8)) ('2.bias', (1,))


In [6]:
net.parameters_dict()['2.bias'].value()

Tensor(shape=[1], dtype=Float32, value= [-2.67072648e-01])

从嵌套块收集参数

In [7]:
def block1():
    return nn.SequentialCell([nn.Dense(4, 8), nn.ReLU(),
                              nn.Dense(8, 4), nn.ReLU()])

def block2():
    net = nn.SequentialCell()
    for i in range(4):
        # 在这里嵌套
        net.append(block1())
    return net

rgnet = nn.SequentialCell([block2(), nn.Dense(4, 1)])
rgnet(X)

Tensor(shape=[2, 1], dtype=Float32, value=
[[-2.32916176e-01],
 [-2.32916176e-01]])

我们已经设计了网络，让我们看看它是如何组织的

In [8]:
print(rgnet)

SequentialCell(
  (0): SequentialCell(
    (0): SequentialCell(
      (0): Dense(input_channels=4, output_channels=8, has_bias=True)
      (1): ReLU()
      (2): Dense(input_channels=8, output_channels=4, has_bias=True)
      (3): ReLU()
    )
    (1): SequentialCell(
      (0): Dense(input_channels=4, output_channels=8, has_bias=True)
      (1): ReLU()
      (2): Dense(input_channels=8, output_channels=4, has_bias=True)
      (3): ReLU()
    )
    (2): SequentialCell(
      (0): Dense(input_channels=4, output_channels=8, has_bias=True)
      (1): ReLU()
      (2): Dense(input_channels=8, output_channels=4, has_bias=True)
      (3): ReLU()
    )
    (3): SequentialCell(
      (0): Dense(input_channels=4, output_channels=8, has_bias=True)
      (1): ReLU()
      (2): Dense(input_channels=8, output_channels=4, has_bias=True)
      (3): ReLU()
    )
  )
  (1): Dense(input_channels=4, output_channels=1, has_bias=True)
)


In [9]:
rgnet[0][1][0].bias.value()

Tensor(shape=[8], dtype=Float32, value= [ 2.89527867e-02,  2.29328331e-02,  3.55436355e-01,  1.04846396e-01, -2.03583434e-01,  3.80997390e-01,  2.77427226e-01, -5.64563945e-02])

默认情况下，MindSpore会使用Normal初始化权重矩阵，
偏置参数设置为0。
MindSpore的`common.initializer`模块中提供了各种初始化方法。

内置初始化

In [10]:
net = nn.SequentialCell([nn.Dense(4, 8, weight_init='normal', bias_init='zero'),
                         nn.ReLU(),
                         nn.Dense(8, 1, weight_init='normal', bias_init='zero')])

net[0].weight.data[0], net[0].bias.data[0]

(Tensor(shape=[4], dtype=Float32, value= [ 4.69799712e-03, -9.98545904e-03,  6.16287021e-03, -1.65258572e-02]),
 Tensor(shape=[], dtype=Float32, value= 0))

In [11]:
net = nn.SequentialCell([nn.Dense(4, 8, weight_init='one', bias_init='zero'),
                         nn.ReLU(),
                         nn.Dense(8, 1, weight_init='one', bias_init='zero')])

net[0].weight.data[0], net[0].bias.data[0]

(Tensor(shape=[4], dtype=Float32, value= [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,  1.00000000e+00]),
 Tensor(shape=[], dtype=Float32, value= 0))

对某些块应用不同的初始化方法

In [12]:
net = nn.SequentialCell([nn.Dense(4, 8, weight_init='xavier_uniform'),
                         nn.ReLU(),
                         nn.Dense(8, 1, weight_init=42)])

print(net[0].weight.data[0])
print(net[2].weight.data[0])

[-0.7070257  -0.520665    0.31937808 -0.35800388]
[42. 42. 42. 42. 42. 42. 42. 42.]


自定义初始化
同样，我们实现了一个`my_init`函数来应用到`net`。

In [13]:
def my_init(shape):
    weight = d2l.uniform(shape, -10, 10)
    weight *= d2l.abs(weight) >= 5
    return weight


net = nn.SequentialCell([nn.Dense(4, 8, weight_init=my_init((8, 4))),
                         nn.ReLU(),
                         nn.Dense(8, 1, weight_init=my_init((1, 8)))])
net[0].weight[:2]

Tensor(shape=[2, 4], dtype=Float32, value=
[[-6.20842695e+00,  9.27657318e+00, -0.00000000e+00, -9.22449112e+00],
 [ 0.00000000e+00,  0.00000000e+00,  5.89274883e+00,  0.00000000e+00]])

In [14]:
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

Tensor(shape=[4], dtype=Float32, value= [ 4.20000000e+01,  1.02765732e+01,  1.00000000e+00, -8.22449112e+00])

参数绑定

In [15]:
# 我们需要给共享层一个名称，以便可以引用它的参数
shared = nn.Dense(8, 8)
net = nn.SequentialCell([nn.Dense(4, 8),
                         nn.ReLU(),
                         shared,
                         nn.ReLU(),
                         shared,
                         nn.ReLU(),
                         nn.Dense(8, 1)])
net(X)
# 检查参数是否相同
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
# 确保它们实际上是同一个对象，而不只是有相同的值
print(net[2].weight.data[0] == net[4].weight.data[0])

[ True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True]
