In [24]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils import data as Data
from torchvision import transforms, datasets

In [25]:
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, stride=strides, padding=1)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [26]:
blk = Residual(3, 3)
X = torch.rand(4, 3, 6, 6)
Y = blk(X)
Y.shape

torch.Size([4, 3, 6, 6])

In [27]:
blk = Residual(3, 6, use_1x1conv=True, strides=2)
blk(X).shape

torch.Size([4, 6, 3, 3])

In [28]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i==0 and not first_block:
            blk.append(Residual(input_channels, num_channels,use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk

In [29]:
b2= nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3= nn.Sequential(*resnet_block(64, 128, 2))
b4= nn.Sequential(*resnet_block(128, 256, 2))
b5= nn.Sequential(*resnet_block(256, 512, 2))

In [30]:
net= nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(), nn.Linear(512, 10))

In [31]:
X=torch.rand(size=(1,1,224,224))
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 10])


In [40]:
lr, num_epochs, batch_size = 0.05, 10, 256
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
mnist_train = datasets.FashionMNIST(root='dataset/Fashion_Minist', train=True, transform=transform, download=True)
mnist_test = datasets.FashionMNIST(root='dataset/Fashion_Minist', train=False, transform=transform, download=True)
train_iter = Data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
test_iter = Data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)


def evaluate_accuracy_gpu(net, data_iter, device=None):
    if isinstance(net, torch.nn.Module):
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device
    metric = [0, 0]
    for X, y in data_iter:
        if isinstance(X, list):
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        y = y.to(device)
        with torch.no_grad():
            metric[0] += torch.sum(torch.argmax(net(X), dim=1) == y).item()
            metric[1] += y.shape[0]
        return metric[0] / metric[1]


def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)

    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    num_batches = len(train_iter)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        metric = [0, 0, 0]
        net.train()
        for i, (X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            metric[0] += l * X.shape[0]
            metric[1] += torch.sum(torch.argmax(y_hat, dim=1) == y).item()
            metric[2] += y.shape[0]
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                print('epoch %d, batch %d, loss %.4f, train acc %.3f' % (epoch + 1, i + 1, train_l, train_acc))
        n += y.shape[0]
    test_acc = evaluate_accuracy_gpu(net, test_iter)
    print(f'loss {train_l:.3f},train acc {train_acc:.3f},test acc {test_acc:.3f}')

In [41]:
train_ch6(net, train_iter, test_iter, num_epochs, lr, 'cuda:0')

training on cuda:0
epoch 1, batch 47, loss 1.2446, train acc 0.598
epoch 1, batch 94, loss 0.8931, train acc 0.699
epoch 1, batch 141, loss 0.7437, train acc 0.744
epoch 1, batch 188, loss 0.6537, train acc 0.773
epoch 1, batch 235, loss 0.5992, train acc 0.790
epoch 2, batch 47, loss 0.3231, train acc 0.881
epoch 2, batch 94, loss 0.3113, train acc 0.885
epoch 2, batch 141, loss 0.3024, train acc 0.889
epoch 2, batch 188, loss 0.2990, train acc 0.890
epoch 2, batch 235, loss 0.2975, train acc 0.891
epoch 3, batch 47, loss 0.2281, train acc 0.919
epoch 3, batch 94, loss 0.2257, train acc 0.919
epoch 3, batch 141, loss 0.2273, train acc 0.918
epoch 3, batch 188, loss 0.2294, train acc 0.917
epoch 3, batch 235, loss 0.2286, train acc 0.917
epoch 4, batch 47, loss 0.1874, train acc 0.933
epoch 4, batch 94, loss 0.1853, train acc 0.933
epoch 4, batch 141, loss 0.1831, train acc 0.934
epoch 4, batch 188, loss 0.1850, train acc 0.934
epoch 4, batch 235, loss 0.1863, train acc 0.933
epoch 5, 

KeyboardInterrupt: 

In [39]:
len(test_iter)

40