In [2]:
# 导入所需的库
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

# 设置cudnn的优化选项
cudnn.benchmark = True

# 设置matplotlib为交互模式
plt.ion()

# 定义图像预处理操作
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),  # 随机裁剪图像
        transforms.RandomHorizontalFlip(),  # 随机水平翻转图像
        transforms.ToTensor(),  # 将图像转换为张量
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 归一化图像
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),  # 调整图像大小
        transforms.CenterCrop(224),  # 中心裁剪图像
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 定义数据集路径
data_dir = 'data'

# 加载数据集
image_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']
}

# 创建数据加载器
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']
}

# 获取数据集大小
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

# 获取类别名称
class_names = image_datasets['train'].classes

# 定义设备，如果有CUDA则使用CUDA
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 定义训练模型的函数
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # 在临时目录中保存最佳模型参数
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        # 初始化最佳模型的准确率
        best_acc = 0.0

        # 迭代每一个epoch
        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # 每个epoch都有训练和验证阶段
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # 设置模型为训练模式
                else:
                    model.eval()  # 设置模型为验证模式

                # 初始化损失和准确数量
                running_loss = 0.0
                running_corrects = 0

                # 迭代数据
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # 将优化器的梯度清零
                    optimizer.zero_grad()

                    # 向前传播
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # 只在训练阶段进行反向传播和优化
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # 计算损失和准确数量
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                # 只在训练阶段更新学习率
                if phase == 'train':
                    scheduler.step()


                # 计算epoch的损失和准确率
                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # 如果在验证阶段且准确率超过最佳准确率，则保存模型
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_params = model.state_dict()

        # 加载最佳模型参数
        model.load_state_dict(best_model_params)

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    return model

# 加载预训练的resnet18模型
model_ft = models.resnet18(pretrained=True)

# 获取模型的特征数量
num_ftrs = model_ft.fc.in_features

# 重置模型的全连接层，使其输出符合我们的类别数量
model_ft.fc = nn.Linear(num_ftrs, len(class_names))

# 将模型送入设备
model_ft = model_ft.to(device)

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# 定义学习率调度器，每7个epochs衰减0.1倍的学习率
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# 训练模型
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=2)

torch.save(model_ft.state_dict(), 'model.pth')
print('Model saved to model.pth')

Epoch 0/1
----------
train Loss: 0.5733 Acc: 0.7336
val Loss: 0.3573 Acc: 0.8497
Epoch 1/1
----------
train Loss: 0.4419 Acc: 0.8238
val Loss: 0.4176 Acc: 0.8627
Training complete in 0m 5s
Best val Acc: 0.862745
Model saved to model.pth


In [3]:
# 加载模型
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model.load_state_dict(torch.load('model.pth'))
model = model.to(device)
model.eval()
print('Model loaded')

# 加载图像
image_path = 'data/val/bees/72100438_73de9f17af.jpg'  # 这里替换为你的图片路径
image = Image.open(image_path)

# 应用相同的数据预处理
image = data_transforms['val'](image).unsqueeze(0).to(device)

# 预测
outputs = model(image)
_, preds = torch.max(outputs, 1)
print(f'The predicted class is: {class_names[preds.item()]}')

Model loaded
The predicted class is: bees
