In [21]:
import torch
from torch import nn
from torch.utils import data as Data
from torchvision import transforms, datasets

In [22]:
def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

In [23]:
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

#vgg-11
def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    for num_convs, out_channels in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels
    return nn.Sequential(*conv_blks, nn.Flatten(), nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(),nn.Dropout(0.5),nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(0.5),nn.Linear(4096,10))

In [24]:
net=vgg(conv_arch)

In [25]:
X=torch.randn(size=(1,1,224,224))
for blk in net:
    X=blk(X)
    print(blk.__class__.__name__,'output shape\t',X.shape)

Sequential output shape	 torch.Size([1, 64, 112, 112])
Sequential output shape	 torch.Size([1, 128, 56, 56])
Sequential output shape	 torch.Size([1, 256, 28, 28])
Sequential output shape	 torch.Size([1, 512, 14, 14])
Sequential output shape	 torch.Size([1, 512, 7, 7])
Flatten output shape	 torch.Size([1, 25088])
Linear output shape	 torch.Size([1, 4096])
ReLU output shape	 torch.Size([1, 4096])
Dropout output shape	 torch.Size([1, 4096])
Linear output shape	 torch.Size([1, 4096])
ReLU output shape	 torch.Size([1, 4096])
Dropout output shape	 torch.Size([1, 4096])
Linear output shape	 torch.Size([1, 10])


In [26]:
# 小vgg
ratio=4
small_conv_arch=[(pair[0],pair[1]//ratio) for pair in conv_arch]
net=vgg(small_conv_arch)

In [27]:
batch_size = 128
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
mnist_train = datasets.FashionMNIST(root='dataset/Fashion_Minist', train=True, transform=transform, download=True)
mnist_test = datasets.FashionMNIST(root='dataset/Fashion_Minist', train=False, transform=transform, download=True)
train_iter = Data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = Data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

In [28]:
def evaluate_accuracy_gpu(net, data_iter, device=None):
    if isinstance(net, torch.nn.Module):
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device
    metric = [0, 0]
    for X, y in data_iter:
        if isinstance(X, list):
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        y = y.to(device)
        with torch.no_grad():
            metric[0] += torch.sum(torch.argmax(net(X), dim=1) == y).item()
            metric[1] += y.shape[0]
        return metric[0] / metric[1]


def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)

    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    num_batches = len(train_iter)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        metric = [0, 0, 0]
        net.train()
        for i,(X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            metric[0] += l * X.shape[0]
            metric[1] += torch.sum(torch.argmax(y_hat, dim=1) == y).item()
            metric[2] += y.shape[0]
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                print('epoch %d, batch %d, loss %.4f, train acc %.3f' % (epoch + 1, i + 1, train_l, train_acc))
        n += y.shape[0]
    test_acc = evaluate_accuracy_gpu(net, test_iter)
    print(f'loss {train_l:.3f},train acc {train_acc:.3f},test acc {test_acc:.3f}')

In [29]:
lr,num_epochs,batch_size=0.05,10,128
train_ch6(net,train_iter,test_iter,num_epochs,lr,'cuda')

training on cuda
epoch 1, batch 93, loss 2.3030, train acc 0.100
epoch 1, batch 186, loss 2.3031, train acc 0.099
epoch 1, batch 279, loss 2.3031, train acc 0.098
epoch 1, batch 372, loss 2.3030, train acc 0.100
epoch 1, batch 465, loss 2.3027, train acc 0.102
epoch 1, batch 469, loss 2.3026, train acc 0.102
epoch 2, batch 93, loss 2.1871, train acc 0.238
epoch 2, batch 186, loss 1.6592, train acc 0.405
epoch 2, batch 279, loss 1.3771, train acc 0.499


KeyboardInterrupt: 