In [1]:
import torch
import torch.nn as nn
from torchinfo import summary

In [2]:
class AlexNet(nn.Module):
    def __init__(self,num_classes = 10000,dropout = 0.5):
        super().__init__()
        # 特征提取
        self.features = nn.Sequential(
        # nn.Sequential:有序容器，按顺序依次执行其中的模块
            # 第一层
            nn.Conv2d(3,64,kernel_size = 11,stride = 4,padding = 2),
            nn.ReLU(inplace = True),
            # inplace = True表示原地计算，节省内存
            nn.MaxPool2d(kernel_size = 3,stride = 2),
            # 第二层
            nn.Conv2d(64,192,kernel_size = 5, padding = 2),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 3,stride = 2),
            # 第三层
            nn.Conv2d(192,384,kernel_size = 3,padding = 1),
            nn.ReLU(inplace = True),
            # 第四层
            nn.Conv2d(384,256,kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            # 第五层
            nn.Conv2d(256,256,kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 3,stride = 2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p = dropout),
            nn.Linear(256 * 6  * 6, 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(p = dropout),
            nn.Linear(4096,4096),
            nn.ReLU(inplace = True),
            nn.Linear(4096, num_classes)
        )
    def forward(self,x):
        x = self.features(x)
        x = torch.flatten(x,1)
        x = self.classifier(x)
        return x

In [3]:
!nvidia-smi

Mon May  5 21:40:47 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.36                 Driver Version: 566.36         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   47C    P3             13W /  140W |    1769MiB /   8188MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [
        torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(),try_gpu(10),try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

In [5]:
model = AlexNet(num_classes = 102)
model = model.to(device = try_gpu())

In [6]:
summary(AlexNet(),input_size = (1,3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
AlexNet                                  [1, 10000]                --
├─Sequential: 1-1                        [1, 256, 6, 6]            --
│    └─Conv2d: 2-1                       [1, 64, 55, 55]           23,296
│    └─ReLU: 2-2                         [1, 64, 55, 55]           --
│    └─MaxPool2d: 2-3                    [1, 64, 27, 27]           --
│    └─Conv2d: 2-4                       [1, 192, 27, 27]          307,392
│    └─ReLU: 2-5                         [1, 192, 27, 27]          --
│    └─MaxPool2d: 2-6                    [1, 192, 13, 13]          --
│    └─Conv2d: 2-7                       [1, 384, 13, 13]          663,936
│    └─ReLU: 2-8                         [1, 384, 13, 13]          --
│    └─Conv2d: 2-9                       [1, 256, 13, 13]          884,992
│    └─ReLU: 2-10                        [1, 256, 13, 13]          --
│    └─Conv2d: 2-11                      [1, 256, 13, 13]         

In [7]:
from torchvision import models 
summary(models.alexnet(),input_size = (1,3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
AlexNet                                  [1, 1000]                 --
├─Sequential: 1-1                        [1, 256, 6, 6]            --
│    └─Conv2d: 2-1                       [1, 64, 55, 55]           23,296
│    └─ReLU: 2-2                         [1, 64, 55, 55]           --
│    └─MaxPool2d: 2-3                    [1, 64, 27, 27]           --
│    └─Conv2d: 2-4                       [1, 192, 27, 27]          307,392
│    └─ReLU: 2-5                         [1, 192, 27, 27]          --
│    └─MaxPool2d: 2-6                    [1, 192, 13, 13]          --
│    └─Conv2d: 2-7                       [1, 384, 13, 13]          663,936
│    └─ReLU: 2-8                         [1, 384, 13, 13]          --
│    └─Conv2d: 2-9                       [1, 256, 13, 13]          884,992
│    └─ReLU: 2-10                        [1, 256, 13, 13]          --
│    └─Conv2d: 2-11                      [1, 256, 13, 13]         

In [8]:
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
import sys
import numpy as np

In [9]:
torch.manual_seed(86)

<torch._C.Generator at 0x2523019f8f0>

In [10]:
optimizer = optim.SGD(model.parameters(), lr = 0.002,momentum = 0.9)
criterion = nn.CrossEntropyLoss()

In [11]:
trainform_train = transforms.Compose([
    transforms.RandomRotation(30),
    # 随机旋转图像（-30度到30度），增加数据多样性
    transforms.RandomResizedCrop((224,224)),
    # 随机裁剪并缩放到224 * 224，模拟不同视角
    transforms.RandomHorizontalFlip(p = 0.5),
    # 以50%的概率水平翻转图像
    transforms.ToTensor(),
    # 将图像转为PyTorch张量(像素值哦那个[0,255],归一化到[0,1])
    transforms.Normalize(mean = [0.485,0.456,0.406], std = [0.229,0.224,0.225])
    # 对图像归一化（基于ImageNet统计量），加速训练收敛
])

In [12]:
transform_test = transforms.Compose([
    transforms.Resize((224,224)),
    # 将图像缩放到224 * 224
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406], std = [0.229,0.224,0.225])
    # 不进行数据增强（保证测试数据一致性）
])

In [13]:
train_dataset = datasets.Flowers102(root = '/data/flowers102/',
                                    split = "train",
                                    download = True, 
                                    transform = trainform_train)
train_loader = DataLoader(train_dataset,batch_size = 256,shuffle = True,num_workers = 16)

test_dataset = datasets.Flowers102(root = '/data/flowers102/',
                                   split = "test",
                                   download = True,
                                   transform = transform_test)
test_loader = DataLoader(test_dataset, batch_size = 256, shuffle = False,num_workers = 16)

In [14]:
print(torch.cuda.is_available())

True


In [None]:
# 设置epoch数并开始训练
num_epochs = 500  # 设置epoch数
loss_history = []  # 创建损失历史记录列表
acc_history = []   # 创建准确率历史记录列表

# tqdm用于显示进度条并评估任务时间开销
for epoch in tqdm(range(num_epochs), file=sys.stdout):
    # 记录损失和预测正确数
    total_loss = 0
    total_correct = 0
    
    # 批量训练
    model.train()
    for inputs, labels in train_loader:
        # 将数据转移到指定计算资源设备上
        inputs = inputs.to(device = try_gpu())
        labels = labels.to(device = try_gpu())
        
        # 预测、损失函数、反向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # 记录训练集loss
        total_loss += loss.item()
    
    # 测试模型，不计算梯度
    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            # 将数据转移到指定计算资源设备上
            inputs = inputs.to(device = try_gpu())
            labels = labels.to(device = try_gpu())
            
            # 预测
            outputs = model(inputs)
            # 记录测试集预测正确数
            total_correct += (outputs.argmax(1) == labels).sum().item()
        
    # 记录训练集损失和测试集准确率
    loss_history.append(np.log10(total_loss))  # 将损失加入损失历史记录列表，由于数值有时较大，这里取对数
    acc_history.append(total_correct / len(test_dataset))# 将准确率加入准确率历史记录列表
    
    # 打印中间值
    if epoch % 5 == 0:
        tqdm.write("Epoch: {0} Loss: {1} Acc: {2}".format(epoch, loss_history[-1], acc_history[-1]))

# 使用Matplotlib绘制损失和准确率的曲线图
import matplotlib.pyplot as plt
plt.plot(loss_history, label='loss')
plt.plot(acc_history, label='accuracy')
plt.legend()
plt.show()

# 输出准确率
print("Accuracy:", acc_history[-1])

Epoch: 0 Loss: 1.2671734970407076 Acc: 0.007643519271426248
  0%|          | 1/500 [01:10<9:47:37, 70.66s/it]