In [1]:
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms

# 1. 载入CIFAR10数据集

In [2]:
DATA_PATH = "./data/datasets"
cifar10_train = datasets.CIFAR10(
    DATA_PATH,
    train=True,
    download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)
cifar10_val = datasets.CIFAR10(
    DATA_PATH,
    train=False,
    download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)

In [3]:
# 本例只需要区分airplane和bird
# 因此从整个数据集中抽取airplane和bird
label_map = {0: 0, 2: 1}  # 原始数据集中 0: airplane 2:bird
cifar2_train = [(img, label_map[label]) for img, label in cifar10_train if label in (0, 2)]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in (0, 2)]
len(cifar2_train), len(cifar2_val)

(10000, 2000)

In [4]:
# DataLoader
# 使用DataLoader的两个好处：
# 1. 可以指定batch_size
# 2. 可以在每个epoch开始前shuffle整个数据集
BATCH_SIZE = 64
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=BATCH_SIZE, shuffle=True)

# 2. Softmax

In [5]:
# Softmax：将一个向量转换使其符合概率分布
t1 = torch.tensor([1., 2., 3., 4.])
softmax = torch.nn.Softmax(dim=-1)  # 指定Softmax操作的维度
softmax(t1)

tensor([0.0321, 0.0871, 0.2369, 0.6439])

In [6]:
t2 = torch.tensor([[1., 2., 3., 4.],
                   [1., 2., 3., 4.]])
softmax(t2)

tensor([[0.0321, 0.0871, 0.2369, 0.6439],
        [0.0321, 0.0871, 0.2369, 0.6439]])

In [7]:
# LogSoftmax：对Softmax的结果取对数
# 解决了当概率趋于0时求log易出错的问题
t1 = torch.tensor([1., 2., 3., 4.])
torch.nn.LogSoftmax(dim=-1)(t1)

tensor([-3.4402, -2.4402, -1.4402, -0.4402])

In [8]:
torch.log(softmax(t1))

tensor([-3.4402, -2.4402, -1.4402, -0.4402])

# 3. NLL(negative log likelihood)

In [9]:
# 假设神经网络输出的shape为(3, 4)，3是图片数量，4是分类个数
out = torch.randn(3, 4)
out

tensor([[-0.5742,  0.3406,  0.1690,  0.0942],
        [-0.5183,  0.0804,  0.0863,  0.6098],
        [ 0.7201,  0.3580,  0.0738, -1.2240]])

In [10]:
tmp = torch.nn.LogSoftmax(dim=-1)(out)
tmp

tensor([[-2.0216, -1.1068, -1.2784, -1.3531],
        [-2.0464, -1.4477, -1.4418, -0.9183],
        [-0.8600, -1.2222, -1.5064, -2.8042]])

In [11]:
# NLL = - sum(log(对应类别的prob))
loss = torch.nn.NLLLoss()
target = torch.tensor([0, 3, 2])  # 3张图片对应的类别
loss(input=tmp, target=target)

tensor(1.4821)

In [12]:
-(tmp[0][0] + tmp[1][3] + tmp[2][2]) / 3

tensor(1.4821)

# 4. Cross Entropy Loss

In [13]:
# nn.CrossEntropyLoss即为nn.LogSoftmax和nn.NLLLoss的整合
loss = torch.nn.CrossEntropyLoss()
loss(input=out, target=target)

tensor(1.4821)

# 5. 创建网络层

In [14]:
model = torch.nn.Sequential(
    torch.nn.Linear(32 * 32 * 3, 1024),
    torch.nn.Tanh(),
    torch.nn.Linear(1024, 512),
    torch.nn.Tanh(),
    torch.nn.Linear(512, 256),
    torch.nn.Tanh(),
    torch.nn.Linear(256, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 2),
)

# 6. 训练

In [15]:
lr = 1e-3
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)
loss_fn = torch.nn.CrossEntropyLoss()
n_epochs = 30

for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        # img: [b, 3, 32, 32] -> [b, -1]
        # out: [b, 2]
        batch_size = imgs.shape[0]
        out = model(imgs.reshape(batch_size, -1))
        loss = loss_fn(input=out, target=labels)

        # 清零梯度信息
        optimizer.zero_grad()
        # 计算loss的梯度
        loss.backward()
        # 用梯度更新模型参数
        optimizer.step()

    print(f"epoch: {epoch} loss: {float(loss)}")

epoch: 0 loss: 0.6405237317085266
epoch: 1 loss: 0.6103984713554382
epoch: 2 loss: 0.5862966775894165
epoch: 3 loss: 0.5428605079650879
epoch: 4 loss: 0.6126407384872437
epoch: 5 loss: 0.6868407130241394
epoch: 6 loss: 0.43032869696617126
epoch: 7 loss: 0.4288029372692108
epoch: 8 loss: 0.5740498900413513
epoch: 9 loss: 0.6416813135147095
epoch: 10 loss: 0.6376734972000122
epoch: 11 loss: 0.5493194460868835
epoch: 12 loss: 0.5485740900039673
epoch: 13 loss: 0.46325165033340454
epoch: 14 loss: 0.3338705599308014
epoch: 15 loss: 0.4930219054222107
epoch: 16 loss: 0.4548221826553345
epoch: 17 loss: 0.43117886781692505
epoch: 18 loss: 0.4639514684677124
epoch: 19 loss: 0.5549517869949341
epoch: 20 loss: 0.5419827103614807
epoch: 21 loss: 0.26175540685653687
epoch: 22 loss: 0.5439325571060181
epoch: 23 loss: 0.553778886795044
epoch: 24 loss: 0.6260469555854797
epoch: 25 loss: 0.5388987064361572
epoch: 26 loss: 0.2847745418548584
epoch: 27 loss: 0.4176800847053528
epoch: 28 loss: 0.617195367

# 7. 测试

In [16]:
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=BATCH_SIZE, shuffle=False)

correct = total = 0

# 在inference过程，需要禁止梯度计算
with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs = model(imgs.reshape(batch_size, -1))
        _, pred = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((pred == labels).sum())

    print(f"Accuracy: {correct / total}")

Accuracy: 0.79
