In [1]:
import torch
from torch import nn

def vgg_block(num_convs, in_channels, out_channels):
    """
    构建一个 VGG 卷积块：包含若干卷积层 + 1 个最大池化层

    参数：
    num_convs: 卷积层数量，例如 2 表示 2 × Conv3
    in_channels: 输入通道数
    out_channels: 卷积层输出通道数（所有该 block 内一致）

    返回：
    一个 nn.Sequential 包含 num_convs 个卷积层 + ReLU + 1 个 MaxPool2d
    """
    layers = []

    # 添加 num_convs 个 Conv3 + ReLU
    for _ in range(num_convs):
        layers.append(nn.Conv2d(
            in_channels, out_channels, kernel_size=3, padding=1  # padding=1 保证尺寸不变
        ))
        layers.append(nn.ReLU())
        in_channels = out_channels  # 下一层的输入通道是当前层输出通道

    # 添加 1 个 2×2 的最大池化层，stride=2 表示下采样一半
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

    return nn.Sequential(*layers)  # 返回层列表的组合结构


In [2]:
import torch
from torch import nn

# 定义卷积结构（类似于 VGG-11）：每个元组是 (卷积次数, 输出通道数)
# 经典设计，每一次图片的高宽减半（池化层），通道数翻倍。
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

# 构建 VGG 网络函数
def vgg(conv_arch):
    conv_blks = []      # 存储每一个 vgg_block
    in_channels = 1     # 假设输入是灰度图（通道数为1）

    for (num_convs, out_channels) in conv_arch:
        # 添加一个卷积块（包含 num_convs 个 Conv3、ReLU 和一个池化层）
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels  # 更新下一块的输入通道数

    return nn.Sequential(
        *conv_blks,         # 解包所有卷积块，加入网络
        nn.Flatten(),       # 将卷积输出展平为向量

        # 全连接层1：输入是 512 × 7 × 7（假设输入图像尺寸是 224×224）
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),

        # 全连接层2
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),

        # 输出层（比如用于10分类）
        nn.Linear(4096, 10)
    )


In [3]:
X = torch.rand(size=(3, 1, 224, 224), dtype=torch.float32)  # 模拟一个 batch size 为 1 的 28x28 灰度图像
net = vgg(conv_arch)
for layer in net:
    X = layer(X)  # 逐层将 X 传入网络
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([3, 64, 112, 112])
Sequential output shape:	 torch.Size([3, 128, 56, 56])
Sequential output shape:	 torch.Size([3, 256, 28, 28])
Sequential output shape:	 torch.Size([3, 512, 14, 14])
Sequential output shape:	 torch.Size([3, 512, 7, 7])
Flatten output shape:	 torch.Size([3, 25088])
Linear output shape:	 torch.Size([3, 4096])
ReLU output shape:	 torch.Size([3, 4096])
Dropout output shape:	 torch.Size([3, 4096])
Linear output shape:	 torch.Size([3, 4096])
ReLU output shape:	 torch.Size([3, 4096])
Dropout output shape:	 torch.Size([3, 4096])
Linear output shape:	 torch.Size([3, 10])


In [4]:
# 丐版的vgg11, 把每一个的输出通道数除以 ratio
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)


In [5]:
from ml_model_utils import *
lr, num_epochs, batch_size = 0.05, 10, 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
train_ch6(net, train_iter, test_iter, num_epochs, lr, try_gpu())


training on cuda:0
loss 0.170, train acc 0.937, test acc 0.923
2019.4 examples/sec on cuda:0
