In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
import torchvision
from torchvision import transforms, models

In [2]:
import sys
sys.path.append('..')
from common import d2l

### 1. 加载CIFAR-10数据

In [3]:
# 定义数据转换
data_transform = transforms.Compose([
    # 将图像调整为指定大小，以便与模型的输入尺寸匹配。这有助于确保模型能够接受统一大小的输入
    transforms.Resize(256),
    # 对图像进行中心裁剪，以去除图像边缘的无关信息。这在保留主要目标的同时减少了图像的大小
    transforms.CenterCrop(224),
    # 将图像转换为PyTorch张量格式，并对像素值进行归一化。这是因为PyTorch模型通常接受张量作为输入
    transforms.ToTensor(),
    # 对图像进行归一化处理，使得图像的像素值服从特定的分布，这有助于加速模型的收敛并提高训练效果
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
# 下载并加载CIFAR-10数据集
train_dataset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=data_transform)
test_dataset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=data_transform)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# 创建数据加载器
train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

### 2. 加载训练的AlexNet模型

In [6]:
# 加载预训练的 AlexNet 模型
alexnet = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)

In [7]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
# 将最后一层的全连接层替换成适合CIFAR-10数据集的新的全连接层
num_classes = 10  # 10个输出类别
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, num_classes)

### 3. 定义损失函数和优化器

In [11]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)  # momentum参数引入了一个动量项

### 4. 设置设备CPU或GPU

In [13]:
# 设置设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
alexnet.to(device)

timer = d2l.Timer()  # 启动一个计时器

### 5. 训练模型

In [14]:
num_epochs = 1
for epoch in range(num_epochs):
    running_loss = 0.0  # 损失
    correct = 0  # 预测正确的数量
    total = 0 # 总数量

    alexnet.train()

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if i >= 200:
            print("用时：", timer.stop())
            break
    train_loss = running_loss / total
    train_acc = correct / total

    print(f'迭代周期 [{epoch + 1}/{num_epochs}], 训练损失: {train_loss:.4f}, 训练精度: {train_acc:.4f}')

用时： 539.2840476036072
迭代周期 [1/1], 训练损失: 1.0106, 训练精度: 0.6435


### 6. 测试模型

In [15]:
alexnet.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = alexnet(inputs)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
        break

test_acc = test_correct / test_total
print(f'测试精度: {test_acc:.4f}')

测试精度: 0.7812


In [16]:
import matplotlib.pyplot as plt
import numpy as np

In [18]:
label_to_class = {
    0: '飞机',
    1: '汽车',
    2: '鸟类',
    3: '猫',
    4: '鹿',
    5: '狗',
    6: '青蛙',
    7: '马',
    8: '船',
    9: '卡车'
}

In [None]:
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = alexnet(images)
        _, predicted = torch.max(outputs, 1)

        # 将张量转换为numpy数组
        images = images.cpu().numpy()
        labels = labels.cpu().numpy()
        predicted = predicted.cpu().numpy()

        # 可视化预测结果和图像
        for i in range(len(images)):
            plt.imshow(np.transpose(images[i], (1, 2, 0)), interpolation='nearest')  # 将图像从(C, H, W)转换为(H, W, C)格式
            plt.title(f"真实类型: {label_to_class[labels[i]]}, 预测类型: {label_to_class[predicted[i]]}")
            plt.show()
        break