# 汇聚层

实现汇聚层的前向传播

In [26]:
import sys
sys.path.append('..')

In [27]:
from d2l import mindspore as d2l
from mindspore import nn, ops

def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = d2l.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y

验证二维最大汇聚层的输出

In [28]:
import mindspore as ms
ms.set_context(mode=ms.PYNATIVE_MODE, device_target="CPU") #!改成CPU形式代码可运行！
X = d2l.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2))



Tensor(shape=[2, 2], dtype=Float32, value=
[[ 4.00000000e+00,  5.00000000e+00],
 [ 7.00000000e+00,  8.00000000e+00]])

验证平均汇聚层

In [29]:
pool2d(X, (2, 2), 'avg')

Tensor(shape=[2, 2], dtype=Float32, value=
[[ 2.00000000e+00,  3.00000000e+00],
 [ 5.00000000e+00,  6.00000000e+00]])

填充和步幅

In [30]:
X = d2l.reshape(d2l.arange(16, dtype=d2l.float32), (1, 1, 4, 4))
X

Tensor(shape=[1, 1, 4, 4], dtype=Float32, value=
[[[[ 0.00000000e+00,  1.00000000e+00,  2.00000000e+00,  3.00000000e+00],
   [ 4.00000000e+00,  5.00000000e+00,  6.00000000e+00,  7.00000000e+00],
   [ 8.00000000e+00,  9.00000000e+00,  1.00000000e+01,  1.10000000e+01],
   [ 1.20000000e+01,  1.30000000e+01,  1.40000000e+01,  1.50000000e+01]]]])

深度学习框架中的步幅与汇聚窗口的大小相同

In [31]:
pool2d = nn.MaxPool2d(3, stride=3)
pool2d(X)

Tensor(shape=[1, 1, 1, 1], dtype=Float32, value=
[[[[ 1.00000000e+01]]]])

填充和步幅可以手动设定

In [32]:
pool2d = nn.MaxPool2d(3, stride=2)
pool2d(ops.Pad(((0,0), (0,0), (1, 1), (1, 1)))(X))

Tensor(shape=[1, 1, 2, 2], dtype=Float32, value=
[[[[ 5.00000000e+00,  7.00000000e+00],
   [ 1.30000000e+01,  1.50000000e+01]]]])

设定一个任意大小的矩形汇聚窗口，并分别设定填充和步幅的高度和宽度

In [33]:
pool2d = nn.MaxPool2d((2, 3), stride=(2, 3))
pool2d(ops.Pad(((0, 0), (0, 0), (0, 0), (1, 1)))(X))

Tensor(shape=[1, 1, 2, 2], dtype=Float32, value=
[[[[ 5.00000000e+00,  7.00000000e+00],
   [ 1.30000000e+01,  1.50000000e+01]]]])

汇聚层在每个输入通道上单独运算

In [34]:
X = d2l.concat((X, X + 1), 1)
X

Tensor(shape=[1, 2, 4, 4], dtype=Float32, value=
[[[[ 0.00000000e+00,  1.00000000e+00,  2.00000000e+00,  3.00000000e+00],
   [ 4.00000000e+00,  5.00000000e+00,  6.00000000e+00,  7.00000000e+00],
   [ 8.00000000e+00,  9.00000000e+00,  1.00000000e+01,  1.10000000e+01],
   [ 1.20000000e+01,  1.30000000e+01,  1.40000000e+01,  1.50000000e+01]],
  [[ 1.00000000e+00,  2.00000000e+00,  3.00000000e+00,  4.00000000e+00],
   [ 5.00000000e+00,  6.00000000e+00,  7.00000000e+00,  8.00000000e+00],
   [ 9.00000000e+00,  1.00000000e+01,  1.10000000e+01,  1.20000000e+01],
   [ 1.30000000e+01,  1.40000000e+01,  1.50000000e+01,  1.60000000e+01]]]])

In [35]:
pool2d = nn.MaxPool2d(3, stride=2)
pool2d(ops.Pad(((0, 0), (0, 0), (1, 1), (1, 1)))(X))

Tensor(shape=[1, 2, 2, 2], dtype=Float32, value=
[[[[ 5.00000000e+00,  7.00000000e+00],
   [ 1.30000000e+01,  1.50000000e+01]],
  [[ 6.00000000e+00,  8.00000000e+00],
   [ 1.40000000e+01,  1.60000000e+01]]]])