In [3]:
from mxnet import autograd,nd
from mxnet.gluon import nn

def corr2d(X,K):
    h,w = K.shape
    Y = nd.zeros((X.shape[0] - h + 1,X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
            
    return Y

In [4]:
X = nd.array(([[0,1,2],[3,4,5],[6,7,8]]))
K = nd.array([[0,1],[2,3]])
corr2d(X,K)


[[19. 25.]
 [37. 43.]]
<NDArray 2x2 @cpu(0)>

* 填充可以增加输出的高和宽.常用来使输出与输入具有相同的高和宽
* 步幅可以减小输出的高和宽,例如输出的高和宽仅为输入的高和宽的1/n

* 最大池化和平均池化窗口中输入元素的最大值和平均值作为输出
* 池化层的一个主要作用是缓解卷积层对位置的过度敏感性
* 可以指定池化层的填充和步幅
* 池化层的输出数跟输入通道数相同

# VGG块

**VGG块的组成规律是: **
连续使用数个相同的填充为1/窗口形状为3x3的卷积层后接上一个步幅为2/窗口形状为2x2的最大池化层.

In [5]:
import d2lzh as d2l
from mxnet import gluon,init,nd
from mxnet.gluon import nn

def vgg_block(num_convs,num_channels):
    blk = nn.Sequential()
    for _ in range(num_convs):
        blk.add(nn.Conv2D(num_channels,kernel_size=3,padding=1,activation='relu'))
    blk.add(nn.MaxPool2D(pool_size=2,strides=2))
    return blk

In [6]:
conv_arch = ((1,64),(1,128),(2,256),(2,512),(2,512))

In [7]:
def vgg(conv_arch):
    net = nn.Sequential()
    for (num_convs,num_channels) in conv_arch:
        net.add(vgg_block(num_convs,num_channels))
    net.add(nn.Dense(4096,activation='relu'),nn.Dropout(0.5),
            nn.Dense(4096,activation='relu'),nn.Dropout(0.5),
            nn.Dense(10))
    return net

net = vgg(conv_arch)
    

In [8]:
net.initialize()
X = nd.random.uniform(shape=(1,1,224,224))


for blk in net:
    X = blk(X)
    print(blk.name,'output shape:\t',X.shape)

sequential1 output shape:	 (1, 64, 112, 112)
sequential2 output shape:	 (1, 128, 56, 56)
sequential3 output shape:	 (1, 256, 28, 28)
sequential4 output shape:	 (1, 512, 14, 14)
sequential5 output shape:	 (1, 512, 7, 7)
dense0 output shape:	 (1, 4096)
dropout0 output shape:	 (1, 4096)
dense1 output shape:	 (1, 4096)
dropout1 output shape:	 (1, 4096)
dense2 output shape:	 (1, 10)


In [9]:
ratio = 4

small_conv_arch = [(pair[0],pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

In [None]:
lr,num_epochs,batch_size,ctx = 0.05,5,128,d2l.try_gpu()
net.initialize(ctx=ctx,init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size,resize=224)
d2l.train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)

  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)


training on cpu(0)


VGG-11 通过5个可以重复使用的卷积块来构造网络
根据每块里卷积层个数和输出通道数的不同可以定义出不同的VGG模型