In [3]:
import torch
from torch import nn
from d2l import torch as d2l
d2l.try_all_gpus
def conv_block(input_channels, num_channels):
    return nn.Sequential(
        nn.BatchNorm2d(input_channels), nn.ReLU(),
        nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1))


In [4]:
class DenseBlock(nn.Module):
    def __init__(self, num_convs, input_channels, num_channels):
        super(DenseBlock, self).__init__()
        layer = []
        for i in range(num_convs):
            layer.append(conv_block(
                num_channels * i + input_channels, num_channels))
        self.net = nn.Sequential(*layer)

    def forward(self, X):
        for blk in self.net:
            Y = blk(X)
            # 连接通道维度上每个块的输入和输出
            X = torch.cat((X, Y), dim=1)
        return X


In [5]:
def transition_block(input_channels, num_channels):
    return nn.Sequential(
        nn.BatchNorm2d(input_channels), nn.ReLU(),
        nn.Conv2d(input_channels, num_channels, kernel_size=1),
        nn.AvgPool2d(kernel_size=2, stride=2))


In [6]:
b1 = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


In [7]:
# `num_channels`为当前的通道数
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
blks = []
for i, num_convs in enumerate(num_convs_in_dense_blocks):
    blks.append(DenseBlock(num_convs, num_channels, growth_rate))
    # 上一个稠密块的输出通道数
    num_channels += num_convs * growth_rate
    # 在稠密块之间添加一个转换层，使通道数量减半
    if i != len(num_convs_in_dense_blocks) - 1:
        blks.append(transition_block(num_channels, num_channels // 2))
        num_channels = num_channels // 2


In [8]:
net = nn.Sequential(
    b1, *blks,
    nn.BatchNorm2d(num_channels), nn.ReLU(),
    nn.AdaptiveMaxPool2d((1, 1)),
    nn.Flatten(),
    nn.Linear(num_channels, 10))


In [9]:
lr, num_epochs, batch_size = 0.1, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu() , num_print= 5 )


training on  cuda:0
num_batch:47 , avg_loss:3.43 , avg_accuracy:0.15
num_batch:94 , avg_loss:2.53 , avg_accuracy:0.27
num_batch:141 , avg_loss:2.04 , avg_accuracy:0.37
num_batch:188 , avg_loss:1.75 , avg_accuracy:0.44
num_batch:235 , avg_loss:1.54 , avg_accuracy:0.50
epochs:0 ,test_acc:0.33 
num_batch:47 , avg_loss:13.53 , avg_accuracy:0.12
num_batch:94 , avg_loss:7.77 , avg_accuracy:0.18
num_batch:141 , avg_loss:5.62 , avg_accuracy:0.28
num_batch:188 , avg_loss:4.47 , avg_accuracy:0.36
num_batch:235 , avg_loss:3.76 , avg_accuracy:0.42
epochs:1 ,test_acc:0.59 
num_batch:47 , avg_loss:0.82 , avg_accuracy:0.68
num_batch:94 , avg_loss:0.82 , avg_accuracy:0.69
num_batch:141 , avg_loss:0.80 , avg_accuracy:0.70
num_batch:188 , avg_loss:0.78 , avg_accuracy:0.70
num_batch:235 , avg_loss:0.76 , avg_accuracy:0.71
epochs:2 ,test_acc:0.75 
num_batch:47 , avg_loss:0.68 , avg_accuracy:0.74
num_batch:94 , avg_loss:0.66 , avg_accuracy:0.75
num_batch:141 , avg_loss:0.65 , avg_accuracy:0.75
num_batch:18

KeyboardInterrupt: 