In [1]:
import torch
from torch import nn
from d2l import torch as d2l

In [16]:
# 四个参数分别对应卷积层的数量num_convs、块的输入通道的数量in_channels 、块的输出通道的数量out_channels 和最后一层卷积是否1×1
def vgg_block(num_convs, in_channels, out_channels, last_1_1_kernel = False):
    layers = []
    kernel_size = 3
    # 每个VGG Block首先由一组卷积层+ReLU组成
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels,out_channels,kernel_size, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    # 如果末尾是1×1的卷积核，则也需要进行添加
    if last_1_1_kernel:
        layers.append(nn.Conv2d(in_channels,out_channels,1, padding=0))
        layers.append(nn.ReLU())
    # 块末尾加上max_pooling层
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

In [19]:
# VGG网络由5个VGG块+三层全连接层组合而成
# 下面的结构是VGG-16网络
conv_arch = ((2, 64, False), (2, 128, False), (2, 256, True), (2, 512, True), (2, 512, True))  # VGG-16原论文中第3、4、5块的最后一层卷积为1×1的核，因此后三层的num_layer先写为2
def vgg16_net(conv_arch):
    conv_block = []
    in_channel = 1
    for num_layer, out_channel, last_1_1_kernel in conv_arch:
        conv_block.append(vgg_block(num_layer, in_channel, out_channel, last_1_1_kernel))
        in_channel = out_channel
    return nn.Sequential(
        *conv_block, nn.Flatten(),
        # 全连接层部分
        nn.Linear(out_channel * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 1000))

In [20]:
net = vgg16_net(conv_arch)
# 观察每个层输出的形状
X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print(blk.__class__.__name__,'output shape:\t',X.shape)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 1000])


In [None]:
# 由于计算量过大，我的CPU瑟瑟发抖，所以就不具体训练了！