In [1]:
import time
import torch
from torch import nn,optim
import torch.nn.functional as F
import sys
sys.path.append('../code/')
import d2lzh_pytorch as d2l
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def conv_block(in_channels,out_channels):
    blk=nn.Sequential(nn.BatchNorm2d(in_channels),
                      nn.ReLU(),
                      nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1)
                     )
    return blk

In [3]:
class DenseBlock(nn.Module):
    def __init__(self,num_convs,in_channels,out_channels):
        super(DenseBlock,self).__init__()
        net=[]
        for i in range(num_convs):
            in_c=in_channels+i*out_channels
            net.append(conv_block(in_c,out_channels))
        self.net=nn.ModuleList(net)
        self.out_channels=in_channels+num_convs*out_channels
        
    def forward(self,X):
        for blk in self.net:
            Y=blk(X)
            X=torch.cat((X,Y),dim=1)
        return X

In [4]:
blk=DenseBlock(2,3,10)
X=torch.rand(4,3,8,8)
Y=blk(X)
Y.shape

torch.Size([4, 23, 8, 8])

In [5]:
def transition_block(in_channels,out_channels):
    blk=nn.Sequential(
        nn.BatchNorm2d(in_channels),
        nn.ReLU(),
        nn.Conv2d(in_channels,out_channels,kernel_size=1),
        nn.AvgPool2d(kernel_size=2,stride=2)
    )
    return blk

In [6]:
blk=transition_block(23,10)
blk(Y).shape

torch.Size([4, 10, 4, 4])

In [7]:
net=nn.Sequential(
    nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
)

In [8]:
num_channels,growth_rate=64,32
num_convs_in_dense_blocks=[4,4,4,4]
for i,num_convs in enumerate(num_convs_in_dense_blocks):
    DB=DenseBlock(num_convs,num_channels,growth_rate) #64+32*4
    net.add_module('DenseBlock_%d' % i,DB)
    num_channels=DB.out_channels
    
    if i!=len(num_convs_in_dense_blocks)-1:
        net.add_module('transition_block_%d' % i,transition_block(num_channels,num_channels//2))
        num_channels=num_channels//2

In [9]:
net.add_module('BN',nn.BatchNorm2d(num_channels))
net.add_module('relu',nn.ReLU())
net.add_module('globals_avg_pool',d2l.GlobalAvgPool2d())
net.add_module('fc',nn.Sequential(d2l.FlattenLayer(),nn.Linear(num_channels,10)))

In [10]:
X=torch.rand((1,1,96,96))
for name,layer in net.named_children():
    X=layer(X)
    print(name,'out shape:\t',X.shape)

0 out shape:	 torch.Size([1, 64, 48, 48])
1 out shape:	 torch.Size([1, 64, 48, 48])
2 out shape:	 torch.Size([1, 64, 48, 48])
3 out shape:	 torch.Size([1, 64, 24, 24])
DenseBlock_0 out shape:	 torch.Size([1, 192, 24, 24])
transition_block_0 out shape:	 torch.Size([1, 96, 12, 12])
DenseBlock_1 out shape:	 torch.Size([1, 224, 12, 12])
transition_block_1 out shape:	 torch.Size([1, 112, 6, 6])
DenseBlock_2 out shape:	 torch.Size([1, 240, 6, 6])
transition_block_2 out shape:	 torch.Size([1, 120, 3, 3])
DenseBlock_3 out shape:	 torch.Size([1, 248, 3, 3])
BN out shape:	 torch.Size([1, 248, 3, 3])
relu out shape:	 torch.Size([1, 248, 3, 3])
globals_avg_pool out shape:	 torch.Size([1, 248, 1, 1])
fc out shape:	 torch.Size([1, 10])


In [11]:
batch_size=128
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=96,root='/workspace/mycode/Dive-into-DL-PyTorch/data')

In [12]:
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
d2l.train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1, loss 0.4228, train acc 0.848, test acc 0.836, time 156.9 sec
epoch 2, loss 0.1341, train acc 0.900, test acc 0.892, time 157.2 sec
epoch 3, loss 0.0769, train acc 0.915, test acc 0.903, time 158.0 sec
epoch 4, loss 0.0516, train acc 0.924, test acc 0.915, time 157.9 sec
epoch 5, loss 0.0380, train acc 0.930, test acc 0.920, time 157.8 sec
