In [1]:
import time
import torch
from torch import nn
import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1),
        nn.ReLU(),
    )

In [3]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, X):
        return nn.functional.avg_pool2d(X, kernel_size=X.shape[2:])

In [4]:
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Dropout(0.5),
    
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    GlobalAvgPool2d(),
    d2l.FlattenLayer()
)

In [5]:
X = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape', X.shape)

0 output shape torch.Size([1, 96, 54, 54])
1 output shape torch.Size([1, 96, 26, 26])
2 output shape torch.Size([1, 256, 26, 26])
3 output shape torch.Size([1, 256, 12, 12])
4 output shape torch.Size([1, 384, 12, 12])
5 output shape torch.Size([1, 384, 5, 5])
6 output shape torch.Size([1, 384, 5, 5])
7 output shape torch.Size([1, 10, 5, 5])
8 output shape torch.Size([1, 10, 1, 1])
9 output shape torch.Size([1, 10])


In [6]:
from functools import reduce
for id, module in enumerate(net):
    num = 0
    for param in module.parameters():
        num += reduce(lambda x,y: x*y, list(param.shape))
    print(str(id), num)

0 30336
1 0
2 746240
3 0
4 1180800
5 0
6 0
7 34790
8 0
9 0


In [7]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224, root='../data')

lr, num_epochs = 0.002, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)


training on  cuda
epoch 1, loss 1.2377, train acc 0.548, test acc 0.712, time 51.8 sec
epoch 2, loss 0.3289, train acc 0.768, test acc 0.790, time 51.7 sec
epoch 3, loss 0.1744, train acc 0.808, test acc 0.811, time 51.9 sec
epoch 4, loss 0.1170, train acc 0.828, test acc 0.835, time 42.6 sec
epoch 5, loss 0.0843, train acc 0.845, test acc 0.846, time 42.4 sec
