In [1]:
import sys
sys.path.append('..')
import gluonbook as gb
from mxnet import nd, init, gluon
from mxnet.gluon import nn

def vgg_block(num_convs, num_channels):
    blk = nn.Sequential()
    for _ in range(num_convs):
        blk.add(nn.Conv2D(
            num_channels, kernel_size=3, padding=1, activation='relu'))
    blk.add(nn.MaxPool2D(pool_size=2, strides=2))
    return blk

  from ._conv import register_converters as _register_converters
  import OpenSSL.SSL


$3\times 3$

In [2]:

def vgg(conv_arch, num_outputs):
    net = nn.Sequential()
    # 卷积层部分
    for (num_convs, num_channels) in conv_arch:
        net.add(vgg_block(num_convs, num_channels))
    # 全连接层部分
    net.add(
        nn.Dense(4096, activation="relu"),
        nn.Dropout(.5),
        nn.Dense(4096, activation="relu"),
        nn.Dropout(.5),
        nn.Dense(num_outputs))
    return net



In [3]:
conv_arch = ((1,64), (1,128), (2,256), (2,512), (2,512))
net = vgg(conv_arch, 1000)
net.initialize()

X = nd.random.uniform(shape=(1,1,224,224))
for blk in net:
    X = blk(X)
    print(blk.name, 'output shape:\t', X.shape)

sequential1 output shape:	 (1, 64, 112, 112)
sequential2 output shape:	 (1, 128, 56, 56)
sequential3 output shape:	 (1, 256, 28, 28)
sequential4 output shape:	 (1, 512, 14, 14)
sequential5 output shape:	 (1, 512, 7, 7)
dense0 output shape:	 (1, 4096)
dropout0 output shape:	 (1, 4096)
dense1 output shape:	 (1, 4096)
dropout1 output shape:	 (1, 4096)
dense2 output shape:	 (1, 1000)


In [4]:
ratio = 4
small_conv_arch = [(pair[0], int(pair[1]/ratio)) for pair in conv_arch]
net = vgg(small_conv_arch, 10)

In [5]:
ctx = gb.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .05})

train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=224)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=3)

Start training on  gpu(0)
Epoch 0. Loss: 0.909, Train acc 0.67, Test acc 0.86, Time 152.7 sec
Epoch 1. Loss: 0.406, Train acc 0.85, Test acc 0.88, Time 131.5 sec
Epoch 2. Loss: 0.332, Train acc 0.88, Test acc 0.90, Time 131.5 sec


VGG通过5个可以重复使用的卷积块来构造网络。根据卷积块里卷积层数目和输出通道不同可以定义出不同的VGG模型。


    VGG的计算比AlexNet慢很多，也需要很多的GPU内存。分析下原因。
    尝试将FashionMNIST的高宽由224改成96，实验其带来的影响。
    参考[1]里的表1来构造VGG其他常用模型，例如VGG16和VGG19。
