In [1]:
import torch
from torch import nn,optim

In [2]:
def conv_block(input_channels,num_channels):
    return nn.Sequential(nn.BatchNorm2d(input_channels),nn.ReLU(),nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1))

In [29]:
class DenseBlock(nn.Module):
    def __init__(self,num_convs,input_channels,num_channels):
        super().__init__()
        layer=[]
        for i in range(num_convs):
            layer.append(conv_block(num_channels*i+input_channels,num_channels))
        self.net=nn.Sequential(*layer)
    def forward(self,X):
        for blk in self.net:
            Y=blk(X)
#             print(Y.shape)
            X=torch.cat((X,Y),dim=1)
        return X

In [7]:
blk=DenseBlock(2,3,10)
X=torch.randn(4,3,8,8)
Y=blk(X)
Y.shape

torch.Size([4, 10, 8, 8])
torch.Size([4, 10, 8, 8])


torch.Size([4, 23, 8, 8])

In [8]:
def transition_block(input_channels,num_channels):
    return nn.Sequential(nn.BatchNorm2d(input_channels),nn.ReLU(),
                        nn.Conv2d(input_channels,num_channels,kernel_size=1),nn.AvgPool2d(kernel_size=2,stride=2))

In [11]:
net1=nn.Sequential(nn.BatchNorm2d(23),nn.ReLU(),
                        nn.Conv2d(23,10,kernel_size=1),nn.AvgPool2d(kernel_size=2,stride=2))

In [13]:
YY=Y
for l in net1:
    YY=l(YY)
    print(l.__class__.__name__,' output  shape : ',YY.shape)

BatchNorm2d  output  shape :  torch.Size([4, 23, 8, 8])
ReLU  output  shape :  torch.Size([4, 23, 8, 8])
Conv2d  output  shape :  torch.Size([4, 10, 8, 8])
AvgPool2d  output  shape :  torch.Size([4, 10, 4, 4])


In [14]:
blk=transition_block(23,10)
blk(Y).shape

torch.Size([4, 10, 4, 4])

In [19]:
b1 = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [32]:
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
blks = []
for i, num_convs in enumerate(num_convs_in_dense_blocks):
    blks.append(DenseBlock(num_convs, num_channels, growth_rate))
    # 上⼀个稠密块的输出通道数
    num_channels += num_convs * growth_rate
    # 在稠密块之间添加⼀个转换层，使通道数量减半
    if i != len(num_convs_in_dense_blocks) - 1:
        blks.append(transition_block(num_channels, num_channels // 2))
        num_channels = num_channels // 2

In [33]:
net = nn.Sequential(
b1, *blks,
nn.BatchNorm2d(num_channels), nn.ReLU(),
nn.AdaptiveMaxPool2d((1, 1)),
nn.Flatten(),
nn.Linear(num_channels, 10))

In [34]:
import torchvision
from torchvision import transforms

In [35]:
trans=[transforms.ToTensor()]
trans.insert(0,transforms.Resize(96))
trans=transforms.Compose(trans)
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=trans)
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=trans)

In [36]:
def evaluate_acc_gpu(net,data_iter,device=None):
    if isinstance(net,nn.Module):
        device='cuda' if torch.cuda.is_available() else 'cpu'
        net.to(device)
        net.eval()
        l_sum=[]
        for X,y in data_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l_sum.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape)==y) / y.shape[0]).item())
    return sum(l_sum)/len(l_sum)

In [37]:
def train_ch6(net,train_iter,test_iter,num_epochs,lr,device):
    def init_weight(m):
        if type(m) ==nn.Linear or type(m) ==nn.Conv2d:
            torch.nn.init.xavier_uniform_(m.weight)
    net.apply(init_weight)
    print('training on : ',device)
    net.to(device)
    loss=nn.CrossEntropyLoss()
    optimizer=optim.SGD(net.parameters(),lr=lr)
    for epoch in range(num_epochs):
        tr_l=[]
        tr_acc=[]
        net.train()
        for X,y in train_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            with torch.no_grad():
                tr_l.append(l.item())
                tr_acc.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape) ==y ) / y.shape[0] ).item() )
        test_acc=evaluate_acc_gpu(net,test_iter,device)
        print('epoch : ',epoch ,' train loss : ',sum(tr_l)/len(tr_l),' train acc : ', sum(tr_acc)/len(tr_acc),'test acc : ',test_acc)

In [38]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False)

In [None]:
lr, num_epochs, batch_size = 0.1, 10, 256
train_ch6(net, train_iter, test_iter, num_epochs, lr, 'cpu')

training on :  cpu
epoch :  0  train loss :  0.7966652532841297  train acc :  0.7601617906955962 test acc :  0.84228515625
epoch :  1  train loss :  0.3631076884396533  train acc :  0.8664007093044037 test acc :  0.826171875
epoch :  2  train loss :  0.29450924751606394  train acc :  0.89028147154666 test acc :  0.8740234375
epoch :  3  train loss :  0.25679481796761777  train acc :  0.9052969859001484 test acc :  0.74345703125
epoch :  4  train loss :  0.23434468190720742  train acc :  0.9131704343126176 test acc :  0.88720703125
epoch :  5  train loss :  0.2115989495465096  train acc :  0.9222462321849579 test acc :  0.89541015625
epoch :  6  train loss :  0.19353248654527866  train acc :  0.9287344859001484 test acc :  0.89326171875


In [None]:
100/6