In [1]:
import sys
sys.path.insert(0, '..')
import gluonbook as gb

from mxnet import nd, init, gluon
from mxnet.gluon import nn

class Inception(nn.Block):
    # c1 - c4 为每条线路里的层的输出通道数。
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # 线路 1，单 1 x 1 卷积层。
        self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
        # 线路 2，1 x 1 卷积层后接 3 x 3 卷积层。
        self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
        self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1,
                              activation='relu')
        # 线路 3，1 x 1 卷积层后接 5 x 5 卷积层。
        self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
        self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2,
                              activation='relu')
        # 线路 4，3 x 3最大池化层后接 1 x 1 卷积层。
        self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
        self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        # 在通道维上合并输出
        return nd.concat(p1, p2, p3, p4, dim=1)

  from ._conv import register_converters as _register_converters
  import OpenSSL.SSL


In [2]:
b1 = nn.Sequential()
b1.add(
    nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [3]:

b2 = nn.Sequential()
b2.add(
    nn.Conv2D(64, kernel_size=1),
    nn.Conv2D(192, kernel_size=3, padding=1),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)



In [4]:

b3 = nn.Sequential()
b3.add(
    Inception(64, (96, 128), (16, 32), 32),
    Inception(128, (128, 192), (32, 96), 64),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)



In [5]:
b4 = nn.Sequential()
b4.add(
    Inception(192, (96, 208), (16, 48), 64),
    Inception(160, (112, 224), (24, 64), 64),
    Inception(128, (128, 256), (24, 64), 64),
    Inception(112, (144, 288), (32, 64), 64),
    Inception(256, (160, 320), (32, 128), 128),
    nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)

In [6]:
b5 = nn.Sequential()
b5.add(
    Inception(256, (160, 320), (32, 128), 128),
    Inception(384, (192, 384), (48, 128), 128),
    nn.GlobalAvgPool2D()
)

net = nn.Sequential()
net.add(b1, b2, b3, b4, b5, nn.Dense(10))

In [7]:

X = nd.random.uniform(shape=(1,1,96,96))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)



sequential0 output shape:	 (1, 64, 24, 24)
sequential1 output shape:	 (1, 192, 12, 12)
sequential2 output shape:	 (1, 480, 6, 6)
sequential3 output shape:	 (1, 832, 3, 3)
sequential4 output shape:	 (1, 1024, 1, 1)
dense0 output shape:	 (1, 10)


In [8]:
ctx = gb.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=96)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=15)

Start training on  gpu(0)
Epoch 0. Loss: 2.172, Train acc 0.20, Test acc 0.19, Time 101.2 sec
Epoch 1. Loss: 0.782, Train acc 0.70, Test acc 0.81, Time 86.5 sec
Epoch 2. Loss: 0.458, Train acc 0.83, Test acc 0.86, Time 86.5 sec
Epoch 3. Loss: 0.375, Train acc 0.86, Test acc 0.88, Time 86.6 sec
Epoch 4. Loss: 0.333, Train acc 0.87, Test acc 0.88, Time 86.7 sec
Epoch 5. Loss: 0.306, Train acc 0.89, Test acc 0.89, Time 86.2 sec
Epoch 6. Loss: 0.281, Train acc 0.89, Test acc 0.89, Time 86.4 sec
Epoch 7. Loss: 0.264, Train acc 0.90, Test acc 0.89, Time 86.6 sec
Epoch 8. Loss: 0.250, Train acc 0.90, Test acc 0.90, Time 86.6 sec
Epoch 9. Loss: 0.237, Train acc 0.91, Test acc 0.91, Time 87.1 sec
Epoch 10. Loss: 0.392, Train acc 0.86, Test acc 0.90, Time 86.7 sec
Epoch 11. Loss: 0.236, Train acc 0.91, Test acc 0.88, Time 86.5 sec
Epoch 12. Loss: 0.216, Train acc 0.92, Test acc 0.91, Time 86.9 sec
Epoch 13. Loss: 0.200, Train acc 0.92, Test acc 0.91, Time 86.7 sec
Epoch 14. Loss: 0.188, Train ac


    GoogLeNet有数个后续版本，尝试实现他们并运行看看有什么不一样。本小节介绍的是最先的版本 [1]。[2] 加入批量归一化层（后一小节将介绍），[3] 对Inception块做了调整。[4] 则加入了残差连接（后面小节将介绍）。
    对比AlexNet、VGG和NiN、GoogLeNet的模型参数大小。分析为什么后两个网络可以显著减小模型大小。


[1] Szegedy, Christian, et al. “Going deeper with convolutions.” CVPR, 2015.

[2] Ioffe, Sergey, and Christian Szegedy. “Batch normalization: Accelerating deep network training by reducing internal covariate shift.” arXiv:1502.03167 (2015).

[3] Szegedy, Christian, et al. “Rethinking the inception architecture for computer vision.” CVPR. 2016.

[4] Szegedy, Christian, et al. “Inception-v4, inception-resnet and the impact of residual connections on learning.” AAAI. 2017.