### CIFAR 10 Classifier
- Ref: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html


In [48]:
import os
import time
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.optim as optim
import numpy as np

In [69]:
# 变量
EPOCHS = 10
BATCH_SIZE = 128
learning_rate = 0.05
momentum = 0.9
DATA_DIR = '../data'
classes = ('plane', 'car', 'bird', 'cat',  # 10 个分类
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
criterion = nn.CrossEntropyLoss()
device = torch.device("mps")

In [50]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [71]:
class ConvNetV2(nn.Module):
    """
    Using a sequential CNN that has more layers and a larger batch size along with defined flattening layer
    """
    def __init__(self):
        super(ConvNetV2, self).__init__()
        # convolutional layer
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # fully connected layers
        self.fc1 = nn.Linear(64 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 64)
        self.fc3 = nn.Linear(64, 10)
        # dropout
        self.dropout = nn.Dropout(p=.5)

    def forward(self, x):
        # add sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        # flattening
        x = x.view(-1, 64 * 4 * 4)
        # fully connected layers
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [75]:
class ConvNetV3(nn.Module):
    """
    - increased the amount of layers, added Convolutional blocks that have a kernel size of 3. 
    - A kernel is a filter used to extract features from the images. 
    - changed the channel sizes.
    """
    def __init__(self):
        super(ConvNetV3, self).__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),
            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )


        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )


    def forward(self, x):
        """
        Perform forward.
        """
        # conv layers
        x = self.conv_layer(x)
        # flatten
        x = x.view(x.size(0), -1)
        # fc layer
        x = self.fc_layer(x)
        return x

In [51]:
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [65]:
def train(model, device, train_loader, optimizer, epoch):
    """
    训练模型
    """
    model.train()
    epoch_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        # 将变量转移至 MPS
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        # loss = F.nll_loss(output, target)  # 损失函数
        loss = criterion(output, target)  # 损失函数

        loss.backward()  # 求导
        optimizer.step()
        if batch_idx % 10 > 0:
            continue
        # 训练准确率
        pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
        # _, pred = torch.max(output.data, 1)
        # print(f"pred: {pred}")
        correct = pred.eq(target.view_as(pred)).sum().item()
        # print(f"correct: {correct}")
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}%\tCosts: {:.6f} seconds'.format(
            epoch, 
            batch_idx * len(data), 
            len(train_loader.dataset),
            batch_idx / len(train_loader) * 100, 
            loss.item(), 
            correct / len(data) * 100,
            time.time() - epoch_time
        ))
        epoch_time = time.time()

In [66]:
def test(model, device, test_loader):
    """
    测试模型
    """
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():  # 不需要求导
        for data, target in test_loader:
            # 批量处理
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [72]:
def main(Model=ConvNet):
    # 归一化
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    will_download = not os.path.exists(f"{DATA_DIR}/cifar-10-python.tar.gz")
    train_set = torchvision.datasets.CIFAR10(
        root=DATA_DIR, train=True, download=will_download, transform=transform
    )
    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2
    )
    test_set = torchvision.datasets.CIFAR10(
        root=DATA_DIR, train=False, download=will_download, transform=transform
    )
    test_loader = torch.utils.data.DataLoader(
        test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=2
    )

    model = Model().to(device)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

    for epoch in range(EPOCHS):
        train(model, device, train_loader, optimizer, epoch + 1)
        test(model, device, test_loader)

In [70]:
# v1: Test set: Average loss: 0.0094, Accuracy: 6113/10000 (61%)
main()


Test set: Average loss: 0.0127, Accuracy: 4279/10000 (43%)


Test set: Average loss: 0.0106, Accuracy: 5184/10000 (52%)




Test set: Average loss: 0.0102, Accuracy: 5423/10000 (54%)


Test set: Average loss: 0.0102, Accuracy: 5453/10000 (55%)




Test set: Average loss: 0.0092, Accuracy: 5945/10000 (59%)


Test set: Average loss: 0.0100, Accuracy: 5740/10000 (57%)




Test set: Average loss: 0.0093, Accuracy: 6090/10000 (61%)


Test set: Average loss: 0.0091, Accuracy: 6133/10000 (61%)




Test set: Average loss: 0.0096, Accuracy: 5955/10000 (60%)


Test set: Average loss: 0.0094, Accuracy: 6113/10000 (61%)



In [73]:
# v2: Test set: Average loss: 0.0072, Accuracy: 6893/10000 (69%)
learning_rate = 0.01
main(ConvNetV2)


Test set: Average loss: 0.0114, Accuracy: 4663/10000 (47%)


Test set: Average loss: 0.0095, Accuracy: 5687/10000 (57%)




Test set: Average loss: 0.0088, Accuracy: 6145/10000 (61%)


Test set: Average loss: 0.0082, Accuracy: 6362/10000 (64%)




Test set: Average loss: 0.0079, Accuracy: 6567/10000 (66%)


Test set: Average loss: 0.0075, Accuracy: 6779/10000 (68%)




Test set: Average loss: 0.0075, Accuracy: 6736/10000 (67%)


Test set: Average loss: 0.0082, Accuracy: 6501/10000 (65%)




Test set: Average loss: 0.0076, Accuracy: 6715/10000 (67%)


Test set: Average loss: 0.0072, Accuracy: 6893/10000 (69%)



In [76]:
# v3: Test set: Average loss: 0.0069, Accuracy: 7126/10000 (71%)
learning_rate = 0.001
main(ConvNetV3)


Test set: Average loss: 0.0126, Accuracy: 4003/10000 (40%)


Test set: Average loss: 0.0103, Accuracy: 5217/10000 (52%)




Test set: Average loss: 0.0087, Accuracy: 5997/10000 (60%)


Test set: Average loss: 0.0078, Accuracy: 6438/10000 (64%)




Test set: Average loss: 0.0075, Accuracy: 6599/10000 (66%)


Test set: Average loss: 0.0064, Accuracy: 7152/10000 (72%)




Test set: Average loss: 0.0061, Accuracy: 7340/10000 (73%)


Test set: Average loss: 0.0056, Accuracy: 7500/10000 (75%)




Test set: Average loss: 0.0058, Accuracy: 7526/10000 (75%)


Test set: Average loss: 0.0069, Accuracy: 7126/10000 (71%)

