In [1]:
import torch
import torchvision
from torch import nn 
from torch.nn import functional as F

In [2]:
class Inception(nn.Module):

    def __init__(self, in_channels, c1, c2, c3, c4):
        super().__init__()
        
        # 线路1, 单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)

        # 线路2, 1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)

        # 线路3, 1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)

        # 线路4, 3x3最大池化层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))

        return torch.cat((p1, p2, p3, p4), dim=1)

In [3]:
b1 = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b2 = nn.Sequential(
    nn.Conv2d(64, 64, kernel_size=1),
    nn.ReLU(),
    nn.Conv2d(64, 192, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b3 = nn.Sequential(
    Inception(192, 64, (96,128), (16,32), 32),
    Inception(256, 128, (128,192), (32,96), 64),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b4 = nn.Sequential(
    Inception(480, 192, (96,208), (16,48), 64),
    Inception(512, 160, (112,224), (24,64), 64),
    Inception(512, 128, (128,256), (24,64), 64),
    Inception(512, 112, (144,288), (32,64), 64),
    Inception(528, 256, (160,320), (32,128), 128),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b5 = nn.Sequential(
    Inception(832, 256, (160,320), (32,128), 128),
    Inception(832, 384, (192,384), (48,128), 128),
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten()
)

net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

In [4]:
def train(dataloader, net, loss, optim):
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        optim.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        optim.step()
        if batch % 100 == 0:
            running_loss = l.item()
            current_batch = batch * len(X)
            print('Train loss: %.4f, [%d/%d]' % (running_loss, current_batch, len(dataloader.dataset)))

In [5]:
def test(dataloader, net, loss):
    num_batches = len(dataloader)
    val_loss = 0.0
    acc = 0.0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            val_loss += loss(y_hat, y).item()
            acc += (y_hat.argmax(dim=1)==y).type(torch.float32).sum().item()
    val_loss /= num_batches
    acc /= len(dataloader.dataset)
    print('Test accuracy: %.4f, Test average loss: %.4f' % (acc, val_loss))

In [6]:
def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)

In [7]:
trans = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize(224)
])
train_data = torchvision.datasets.FashionMNIST(
    root='../data', train=True, transform=trans, download=False
)
test_data = torchvision.datasets.FashionMNIST(
    root='../data', train=False, transform=trans, download=False
)
print('The number of training data:', len(train_data))
print('The number of test data:', len(test_data))

The number of training data: 60000
The number of test data: 10000


In [8]:
batch_size = 64
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)
for X,y in test_dataloader:
    print('The shape of X:', X.shape)
    print('The shape of y:', y.shape)
    break

The shape of X: torch.Size([64, 1, 224, 224])
The shape of y: torch.Size([64])


In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = net.apply(init_weights).to(device)
loss = torch.nn.CrossEntropyLoss()
optim = torch.optim.SGD(net.parameters(), lr=1e-2, momentum=0.9)

In [10]:
num_epochs = 5
print('Starting training ...')
print('Training on', device)
for epoch in range(num_epochs):
    print('epoch %d' % (epoch+1))
    train(train_dataloader, net, loss, optim)
    test(test_dataloader, net, loss)
print('Done!')

Starting training ...
Training on cuda:0
epoch 1
Train loss: 2.3120, [0/60000]
Train loss: 2.3041, [6400/60000]
Train loss: 2.2973, [12800/60000]
Train loss: 2.2627, [19200/60000]
Train loss: 1.5875, [25600/60000]
Train loss: 1.0370, [32000/60000]
Train loss: 0.9313, [38400/60000]
Train loss: 0.7897, [44800/60000]
Train loss: 0.7173, [51200/60000]
Train loss: 0.6693, [57600/60000]
Test accuracy: 0.7346, Test average loss: 0.6772
epoch 2
Train loss: 0.6128, [0/60000]
Train loss: 0.5518, [6400/60000]
Train loss: 0.4828, [12800/60000]
Train loss: 0.4695, [19200/60000]
Train loss: 0.4506, [25600/60000]
Train loss: 0.4573, [32000/60000]
Train loss: 0.3695, [38400/60000]
Train loss: 0.5880, [44800/60000]
Train loss: 0.4332, [51200/60000]
Train loss: 0.4260, [57600/60000]
Test accuracy: 0.8400, Test average loss: 0.4338
epoch 3
Train loss: 0.2961, [0/60000]
Train loss: 0.3029, [6400/60000]
Train loss: 0.2851, [12800/60000]
Train loss: 0.3609, [19200/60000]
Train loss: 0.4185, [25600/60000]
Tr