In [18]:
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms

# 1. 载入CIFAR10数据集

In [19]:
DATA_PATH = "./data/datasets"
cifar10_train = datasets.CIFAR10(
    DATA_PATH,
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)
cifar10_val = datasets.CIFAR10(
    DATA_PATH,
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4915, 0.4823, 0.4468],
                             [0.2470, 0.2435, 0.2616],),
    ]),
)

Files already downloaded and verified
Files already downloaded and verified


In [20]:
# 本例只需要区分airplane和bird
# 因此从整个数据集中抽取airplane和bird
label_map = {0: 0, 2: 1}  # 原始数据集中 0: airplane 2:bird
cifar2_train = [(img, label_map[label]) for img, label in cifar10_train if label in (0, 2)]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in (0, 2)]
len(cifar2_train), len(cifar2_val)

(10000, 2000)

In [21]:
# DataLoader
# 使用DataLoader的两个好处：
# 1. 可以指定batch_size
# 2. 可以在每个epoch开始前shuffle整个数据集
BATCH_SIZE = 64
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=BATCH_SIZE, shuffle=True)

# 2. Softmax

In [22]:
# Softmax：将一个向量转换使其符合概率分布
t1 = torch.tensor([1., 2., 3., 4.])
softmax = torch.nn.Softmax(dim=-1)  # 指定Softmax操作的维度
softmax(t1)

tensor([0.0321, 0.0871, 0.2369, 0.6439])

In [23]:
t2 = torch.tensor([[1., 2., 3., 4.],
                   [1., 2., 3., 4.]])
softmax(t2)

tensor([[0.0321, 0.0871, 0.2369, 0.6439],
        [0.0321, 0.0871, 0.2369, 0.6439]])

In [24]:
# LogSoftmax：对Softmax的结果取对数
# 解决了当概率趋于0时求log易出错的问题
t1 = torch.tensor([1., 2., 3., 4.])
torch.nn.LogSoftmax(dim=-1)(t1)

tensor([-3.4402, -2.4402, -1.4402, -0.4402])

In [25]:
torch.log(softmax(t1))

tensor([-3.4402, -2.4402, -1.4402, -0.4402])

# 3. NLL(negative log likelihood)

In [26]:
# 假设神经网络输出的shape为(3, 4)，3是图片数量，4是分类个数
out = torch.randn(3, 4)
out

tensor([[-0.6360, -0.0668, -0.0512,  0.0691],
        [-1.5530, -0.4829, -1.2019,  0.0383],
        [ 1.3670, -0.8276, -0.6638,  1.0416]])

In [27]:
tmp = torch.nn.LogSoftmax(dim=-1)(out)
tmp

tensor([[-1.8849, -1.3157, -1.3001, -1.1798],
        [-2.3269, -1.2568, -1.9758, -0.7356],
        [-0.6754, -2.8701, -2.7063, -1.0008]])

In [28]:
# NLL = - sum(log(对应类别的prob))
loss = torch.nn.NLLLoss()
target = torch.tensor([0, 3, 2])  # 3张图片对应的类别
loss(input=tmp, target=target)

tensor(1.7756)

In [29]:
-(tmp[0][0] + tmp[1][3] + tmp[2][2]) / 3

tensor(1.7756)

# 4. Cross Entropy Loss

In [30]:
# nn.CrossEntropyLoss即为nn.LogSoftmax和nn.NLLLoss的整合
loss = torch.nn.CrossEntropyLoss()
loss(input=out, target=target)

tensor(1.7756)

# 5. 创建网络层

In [31]:
model = torch.nn.Sequential(
    torch.nn.Linear(32 * 32 * 3, 1024),
    torch.nn.Tanh(),
    torch.nn.Linear(1024, 512),
    torch.nn.Tanh(),
    torch.nn.Linear(512, 256),
    torch.nn.Tanh(),
    torch.nn.Linear(256, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 2),
)

# 6. 训练

In [32]:
lr = 1e-2
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)
loss_fn = torch.nn.CrossEntropyLoss()
n_epochs = 20

for epoch in range(n_epochs):
    losses = []
    for imgs, labels in train_loader:
        # img: [b, 3, 32, 32] -> [b, -1]
        # out: [b, 2]
        batch_size = imgs.shape[0]
        out = model(imgs.reshape(batch_size, -1))
        loss = loss_fn(input=out, target=labels)

        # 清零梯度信息
        optimizer.zero_grad()
        # 计算loss的梯度
        loss.backward()
        # 用梯度更新模型参数
        optimizer.step()

        losses.append(float(loss))

    print(f"epoch: {epoch} loss: {sum(losses) / len(losses)}")

epoch: 0 loss: 0.588484963414016
epoch: 1 loss: 0.5014044893015722
epoch: 2 loss: 0.48209624286669833
epoch: 3 loss: 0.4712113159097684
epoch: 4 loss: 0.46001220015203875
epoch: 5 loss: 0.4497708412492351
epoch: 6 loss: 0.44224643764222504
epoch: 7 loss: 0.43368932225142315
epoch: 8 loss: 0.4246966914766154
epoch: 9 loss: 0.4163347775009787
epoch: 10 loss: 0.4098209279358007
epoch: 11 loss: 0.4020033873570193
epoch: 12 loss: 0.39598155515209127
epoch: 13 loss: 0.3864271598067253
epoch: 14 loss: 0.38284256817049284
epoch: 15 loss: 0.36985874944811414
epoch: 16 loss: 0.36772235611062143
epoch: 17 loss: 0.35801338504074487
epoch: 18 loss: 0.3564667286956386
epoch: 19 loss: 0.34396797428085546


# 7. 测试

In [33]:
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=BATCH_SIZE, shuffle=False)

correct = total = 0

# 在inference过程，需要禁止梯度计算
with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs = model(imgs.reshape(batch_size, -1))
        _, pred = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((pred == labels).sum())

    print(f"Accuracy: {correct / total}")

Accuracy: 0.6855
