In [1]:
import time
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import d2lzh as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def conv_block(in_channels,out_channels):
    blk = nn.Sequential(
        nn.BatchNorm2d(in_channels),
        nn.ReLU(),
        nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1)
    )
    return blk

class DenseBlock(nn.Module):
    def __init__(self,num_convs,in_channels,out_channels):
        super(DenseBlock,self).__init__()
        net = []
        for i in range(num_convs):
            in_c = in_channels + i * out_channels
            net.append(conv_block(in_c,out_channels))
        self.net = nn.ModuleList(net)
        self.out_channels = in_channels + num_convs * out_channels

    def forward(self,x):
        for blk in self.net:
            y = blk(x)
            x = torch.cat((x,y),dim=1) #在通道维度上将输入和输出连接
        return x

blk = DenseBlock(2,3,10)
x = torch.randn(4,3,8,8)
y = blk(x)
print(y.shape)

torch.Size([4, 23, 8, 8])


In [2]:
def transition_block(in_channels,out_channels):
    blk= nn.Sequential(
        nn.BatchNorm2d(in_channels),
        nn.ReLU(),
        nn.Conv2d(in_channels,out_channels,kernel_size=1),
        nn.AvgPool2d(kernel_size=2,stride=2)
    )
    return blk

blk = transition_block(23,10)
blk(y).shape

torch.Size([4, 10, 4, 4])

In [3]:
net = nn.Sequential(
    nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
)

num_channels,growth_rate=64,32
num_convs_in_dense_blocks = [4,4,4,4]

for i,num_convs in enumerate(num_convs_in_dense_blocks):
    db = DenseBlock(num_convs,num_channels,growth_rate)
    net.add_module("DenseBlock_%d" %i,db)
    num_channels = db.out_channels
    if i != len(num_convs_in_dense_blocks)-1: # 在稠密块之间加入通道数量减半的过渡层
        net.add_module("transition_block_{}".format(i),transition_block(num_channels,num_channels//2))
        num_channels = num_channels // 2

net.add_module("BN",nn.BatchNorm2d(num_channels))
net.add_module("relu",nn.ReLU())
net.add_module("global_avg_pool",d2l.GlobalAvgPool2d())
net.add_module("fc",nn.Sequential(d2l.FlattenLayer(),nn.Linear(num_channels,10)))


x = torch.rand(1,1,96,96)
for name,layer in net.named_children():
    x = layer(x)
    print(name,' output shape:\t',x.shape)

0  output shape:	 torch.Size([1, 64, 48, 48])
1  output shape:	 torch.Size([1, 64, 48, 48])
2  output shape:	 torch.Size([1, 64, 48, 48])
3  output shape:	 torch.Size([1, 64, 24, 24])
DenseBlock_0  output shape:	 torch.Size([1, 192, 24, 24])
transition_block_0  output shape:	 torch.Size([1, 96, 12, 12])
DenseBlock_1  output shape:	 torch.Size([1, 224, 12, 12])
transition_block_1  output shape:	 torch.Size([1, 112, 6, 6])
DenseBlock_2  output shape:	 torch.Size([1, 240, 6, 6])
transition_block_2  output shape:	 torch.Size([1, 120, 3, 3])
DenseBlock_3  output shape:	 torch.Size([1, 248, 3, 3])
BN  output shape:	 torch.Size([1, 248, 3, 3])
relu  output shape:	 torch.Size([1, 248, 3, 3])
global_avg_pool  output shape:	 torch.Size([1, 248, 1, 1])
fc  output shape:	 torch.Size([1, 10])


In [4]:
batch_size = 64 # 如出现“out of memory”的报错信息，可减⼩小batch_size或resize 
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
lr, num_epochs = 0.001, 5 
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.4464, train acc 0.841,test acc 0.866,time 50.3
epoch 2, loss 0.1359, train acc 0.901,test acc 0.886,time 48.9
epoch 3, loss 0.0775, train acc 0.914,test acc 0.901,time 49.1
epoch 4, loss 0.0520, train acc 0.924,test acc 0.919,time 49.1
epoch 5, loss 0.0374, train acc 0.930,test acc 0.844,time 49.1
