In [1]:
# 导入所需库
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
import zipfile
import requests
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.transforms import v2



In [2]:
# 自定义三通道随机噪声
class AddRandomNoise:
    def __init__(self, R=0.1):
        self.R = R
        
    def __call__(self, img):
        noise = torch.FloatTensor(img.size()).uniform_(-self.R, self.R)
        noisy_img = img + noise
        # 使用clamp限制在[0,1]范围内
        noisy_img = torch.clamp(noisy_img, 0, 1)
        return noisy_img

In [None]:
# 无数据增广的预处理
transform_no_aug = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 使用torchvision的数据增广
transform_torchvision = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    v2.RandomRotation(degrees=90),  # 随机90度旋转
    v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 色调变化
    transforms.ToTensor(),
    AddRandomNoise(R=0.05),  # 自定义随机噪声
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 使用albumentations的数据增广
transform_albumentations = A.Compose([
    A.RandomResizedCrop(
        size=(224, 224),  # 关键修改：使用size参数替代height和width
        scale=(0.08, 1.0),
        ratio=(0.75, 1.33),
        p=1.0
    ),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=90, p=0.5),
    A.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1,
        p=0.5
    ),
    A.Blur(blur_limit=3, p=0.3),
    A.OpticalDistortion(p=0.3),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# 选择数据增广方式
use_augmentation = 'torchvision'  # 可选项: 'none', 'torchvision', 'albumentations'

if use_augmentation == 'none':
    transform_train = transform_no_aug
elif use_augmentation == 'torchvision':
    transform_train = transform_torchvision
else:
    transform_train = transform_albumentations

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
# 加载数据集
train_dataset = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/train', transform=transform_train)
test_dataset = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/test', transform=transform_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# 检查设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
# 构建ResNet-18模型
def build_resnet18():
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)
    model = model.to(device)
    return model

In [6]:
# 训练函数
def train_model(model, criterion, optimizer, num_epochs=10):
    train_losses, train_accs, val_losses, val_accs = [], [], [], []
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # 训练阶段
        for inputs, labels in train_loader:
            if isinstance(inputs, list):  # 处理albumentations的输出
                inputs = inputs[0]
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        # 计算训练指标
        train_loss = running_loss / len(train_loader)
        train_acc = correct / total
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # 验证阶段
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        # 计算验证指标
        val_loss = val_loss / len(test_loader)
        val_acc = val_correct / val_total
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        # 更新最佳准确率
        if val_acc > best_acc:
            best_acc = val_acc

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f} Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}\n')

    return train_losses, train_accs, val_losses, val_accs, best_acc

In [None]:
# 训练有数据增广的ResNet-18
print("Training ResNet-18 with augmentation...")
use_augmentation = 'torchvision'
train_dataset = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/train', transform=transform_torchvision)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

model_resnet_aug = build_resnet18()
criterion = nn.CrossEntropyLoss()
optimizer_resnet = torch.optim.SGD(model_resnet_aug.parameters(), lr=0.001, momentum=0.9)
train_loss_aug, train_acc_aug, val_loss_aug, val_acc_aug, best_aug = train_model(model_resnet_aug, criterion, optimizer_resnet, 10)

Training ResNet-18 with augmentation...


In [None]:
# 训练无数据增广的ResNet-18
print("Training ResNet-18 without augmentation...")
use_augmentation = 'none'
train_dataset = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog//train', transform=transform_no_aug)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

model_resnet_noaug = build_resnet18()
criterion = nn.CrossEntropyLoss()
optimizer_resnet = torch.optim.SGD(model_resnet_noaug.parameters(), lr=0.001, momentum=0.9)
train_loss_noaug, train_acc_noaug, val_loss_noaug, val_acc_noaug, best_noaug = train_model(model_resnet_noaug, criterion, optimizer_resnet, 10)

In [None]:
# 绘制对比曲线
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_loss_noaug, label='No Augmentation')
plt.plot(train_loss_aug, label='With Augmentation')
plt.title('Training Loss Comparison')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(val_acc_noaug, label='No Augmentation')
plt.plot(val_acc_aug, label='With Augmentation')
plt.title('Validation Accuracy Comparison')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
print("| 模型 | 训练准确率 | 测试准确率 |")
print("| --- | --- | --- |")
print(f"| ResNet-18无数据增广 | {train_acc_noaug[-1]:.4f} | {val_acc_noaug[-1]:.4f} |")
print(f"| ResNet-18有数据增广 | {train_acc_aug[-1]:.4f} | {val_acc_aug[-1]:.4f} |")