In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt


# 定义 AlexNet 网络结构
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(
                1, 96, kernel_size=11, stride=4, padding=2
            ),  # 输入通道为1，因为Fashion-MNIST是灰度图像
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


# 数据加载和预处理
def load_data(batch_size=64):
    transform = transforms.Compose(
        [
            transforms.Resize(
                (224, 224)
            ),  # 将图像大小调整为224x224，以适应AlexNet的输入要求
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,)),  # 对图像进行归一化处理
        ]
    )

    train_dataset = datasets.FashionMNIST(
        root="../data", train=True, download=True, transform=transform
    )
    test_dataset = datasets.FashionMNIST(
        root="../data", train=False, download=True, transform=transform
    )

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


# 训练函数
def train(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch_idx % 100 == 99:
                print(
                    f"Epoch [{epoch + 1}/{num_epochs}], Batch [{batch_idx + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}"
                )
                running_loss = 0.0


# 测试函数
def test(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")


# 主函数
def main():
    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else 
                          "mps"if torch.backends.mps.is_available() else
                          "cpu")
    print(f"Using device: {device}")

    # 加载数据
    train_loader, test_loader = load_data(batch_size=64)

    # 定义模型、损失函数和优化器
    model = AlexNet(num_classes=10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    train(model, train_loader, criterion, optimizer, device, num_epochs=10)

    # 测试模型
    test(model, test_loader, device)


if __name__ == "__main__":
    main()

Using device: mps
Epoch [1/10], Batch [100/938], Loss: 1.5199
Epoch [1/10], Batch [200/938], Loss: 0.6893
Epoch [1/10], Batch [300/938], Loss: 0.5909
Epoch [1/10], Batch [400/938], Loss: 0.5323
Epoch [1/10], Batch [500/938], Loss: 0.4985
Epoch [1/10], Batch [600/938], Loss: 0.4830
Epoch [1/10], Batch [700/938], Loss: 0.4563
Epoch [1/10], Batch [800/938], Loss: 0.4480
Epoch [1/10], Batch [900/938], Loss: 0.4376
Epoch [2/10], Batch [100/938], Loss: 0.4139
Epoch [2/10], Batch [200/938], Loss: 0.4031
Epoch [2/10], Batch [300/938], Loss: 0.4025
Epoch [2/10], Batch [400/938], Loss: 0.3806
Epoch [2/10], Batch [500/938], Loss: 0.3922
Epoch [2/10], Batch [600/938], Loss: 0.3849
Epoch [2/10], Batch [700/938], Loss: 0.3692
Epoch [2/10], Batch [800/938], Loss: 0.3897
Epoch [2/10], Batch [900/938], Loss: 0.3735
Epoch [3/10], Batch [100/938], Loss: 0.3564
Epoch [3/10], Batch [200/938], Loss: 0.3403
Epoch [3/10], Batch [300/938], Loss: 0.3523
Epoch [3/10], Batch [400/938], Loss: 0.3425
Epoch [3/10], 