In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torchvision

In [2]:
class Residual(nn.Module):

    def __init__(self, input_channels, output_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(output_channels)
        self.bn2 = nn.BatchNorm2d(output_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [3]:
def resnet_block(input_channels, output_channels, num_residuals, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i==0 and not first_block:
            blk.append(Residual(input_channels, output_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(output_channels, output_channels))
    return blk

In [4]:
block1 = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
block2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
block3 = nn.Sequential(*resnet_block(64, 128, 2))
block4 = nn.Sequential(*resnet_block(128, 256, 2))
block5 = nn.Sequential(*resnet_block(256,512, 2))
net = nn.Sequential(
    block1, block2, block3, block4, block5,
    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten(),
    nn.Linear(512, 10)
)

In [5]:
trans = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize(224)
])

train_data = torchvision.datasets.FashionMNIST(
    root='../data', train=True, transform=trans, download=False
)

test_data = torchvision.datasets.FashionMNIST(
    root='../data', train=False, transform=trans, download=False
)

batch_size = 64

train_dataloader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size
)

test_dataloader = torch.utils.data.DataLoader(
    test_data, batch_size=batch_size
)

In [6]:
def accuracy(y_hat, y):
    acc = (y_hat.argmax(dim=1)==y).type(torch.float32).sum().item()
    return acc

In [7]:
def train(net, dataloader, loss, trainer):
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        trainer.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        trainer.step()
        if batch%100 == 0:
            train_loss = l.item()
            train_acc = accuracy(y_hat, y) / len(y)
            current_batch = batch * len(y)
            print('Train loss: %.4f\tTrain acc: %.4f\t[%d/%d]' % (train_loss, train_acc, current_batch, len(dataloader.dataset)))

In [8]:
def test(net, dataloader, loss):
    num_batches = len(dataloader)
    test_loss = 0.0
    test_acc = 0.0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            test_acc += accuracy(y_hat, y)
            test_loss += loss(y_hat, y)
    test_acc /= len(dataloader.dataset)
    test_loss /= num_batches
    print('Test loss: %.4f\tTest acc: %.4f' % (test_loss, test_acc))

In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = net.to(device)
loss = nn.CrossEntropyLoss()
trainer = torch.optim.Adam(net.parameters(), lr=1e-2)

num_epochs = 5
print('Start training ...')
print('Training on', device)
for epoch in range(num_epochs):
    print('epoch', epoch+1)
    train(net, train_dataloader, loss, trainer)
    test(net, test_dataloader, loss)
print('Done!')

Start training ...
Training on cuda:0
epoch 1
Train loss: 2.3730	Train acc: 0.1094	[0/60000]
Train loss: 0.9741	Train acc: 0.5938	[6400/60000]
Train loss: 0.4410	Train acc: 0.8750	[12800/60000]
Train loss: 0.5638	Train acc: 0.7812	[19200/60000]
Train loss: 0.5420	Train acc: 0.7812	[25600/60000]
Train loss: 0.5334	Train acc: 0.8438	[32000/60000]
Train loss: 0.3601	Train acc: 0.8594	[38400/60000]
Train loss: 0.5863	Train acc: 0.7344	[44800/60000]
Train loss: 0.5095	Train acc: 0.8438	[51200/60000]
Train loss: 0.4389	Train acc: 0.7969	[57600/60000]
Test loss: 0.4551	Test acc: 0.8346
epoch 2
Train loss: 0.3100	Train acc: 0.8906	[0/60000]
Train loss: 0.3396	Train acc: 0.8750	[6400/60000]
Train loss: 0.4773	Train acc: 0.8281	[12800/60000]
Train loss: 0.3585	Train acc: 0.8438	[19200/60000]
Train loss: 0.4143	Train acc: 0.8438	[25600/60000]
Train loss: 0.3995	Train acc: 0.8594	[32000/60000]
Train loss: 0.3675	Train acc: 0.9062	[38400/60000]
Train loss: 0.5134	Train acc: 0.7812	[44800/60000]
Tra

In [7]:
def train_with_val(net, trainloader, testloader, lr, num_epochs):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net.to(device)
    trainer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    print('Start training ...')
    print('Training on',  device)
    for epoch in range(num_epochs):
        test_acc = 0.0
        test_loss = 0.0
        print('epoch:', epoch+1)
        for batch, (X, y) in enumerate(trainloader):
            X, y = X.to(device), y.to(device)
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            if batch%200 == 0:
                train_loss = l.item()
                current_batch = batch * len(X)
                train_acc = accuracy(y_hat, y)/len(X)
                print('Train loss: %.4f\tTrain acc:%.4f\t[%d/%d]' % (train_loss, train_acc, current_batch, len(trainloader.dataset)))
        with torch.no_grad():
            for X, y in testloader:
                X, y = X.to(device), y.to(device)
                y_hat = net(X)
                test_loss += loss(y_hat, y)
                test_acc += accuracy(y_hat, y)
        test_acc /= len(testloader.dataset)
        test_loss /= len(testloader)
        print('Test loss: %.4f\tTest acc: %.4f' % (test_loss, test_acc))
    print('Done!')

In [8]:
train_with_val(net, train_dataloader, test_dataloader, lr=1e-2, num_epochs=5)

Start training ...
Training on cuda:0
epoch: 1
Train loss: 2.4085	Train acc:0.1094	[0/60000]
Train loss: 0.4832	Train acc:0.8750	[12800/60000]
Train loss: 0.4928	Train acc:0.8438	[25600/60000]
Train loss: 0.4397	Train acc:0.8594	[38400/60000]
Train loss: 0.4377	Train acc:0.8750	[51200/60000]
Test loss: 0.3807	Test acc: 0.8653
epoch: 2
Train loss: 0.2697	Train acc:0.8906	[0/60000]
Train loss: 0.2393	Train acc:0.9062	[12800/60000]
Train loss: 0.2554	Train acc:0.8906	[25600/60000]
Train loss: 0.2559	Train acc:0.9062	[38400/60000]
Train loss: 0.3262	Train acc:0.9219	[51200/60000]
Test loss: 0.3058	Test acc: 0.8910
epoch: 3
Train loss: 0.2400	Train acc:0.9062	[0/60000]
Train loss: 0.1825	Train acc:0.9375	[12800/60000]
Train loss: 0.2217	Train acc:0.8906	[25600/60000]
Train loss: 0.1790	Train acc:0.9219	[38400/60000]
Train loss: 0.2784	Train acc:0.8906	[51200/60000]
Test loss: 0.2785	Test acc: 0.9057
epoch: 4
Train loss: 0.2140	Train acc:0.9062	[0/60000]
Train loss: 0.1575	Train acc:0.9531	[