In [9]:
# 导入所需库
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.transforms import v2
from PIL import Image

In [10]:
# 自定义三通道随机噪声（确保输入是 tensor）
class AddRandomNoise:
    def __init__(self, R=0.1):
        self.R = R
        
    def __call__(self, img):
        if not isinstance(img, torch.Tensor):
            raise TypeError("Expected input to be a tensor")
        noise = torch.empty_like(img).uniform_(-self.R, self.R)
        return torch.clamp(img + noise, 0.0, 1.0)

In [11]:
 # 无数据增广的预处理
transform_no_aug = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 使用torchvision的数据增广
transform_torchvision = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    v2.RandomRotation(degrees=90),
    v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    AddRandomNoise(R=0.05),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
class AlbumentationsTransform:
    def __init__(self):
        self.transform = A.Compose([
            A.RandomResizedCrop(height=224, width=224, scale=(0.08, 1.0), ratio=(0.75, 1.33), p=1.0),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(limit=90, p=0.5),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
            A.Blur(blur_limit=3, p=0.3),
            A.OpticalDistortion(p=0.3),
            A.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
    
    def __call__(self, img):
        img = np.array(img)
        return self.transform(image=img)["image"]

transform_albumentations = AlbumentationsTransform()

ValueError: 1 validation error for InitSchema
size
  Field required [type=missing, input_value={'scale': (0.08, 1.0), 'r...'mask_interpolation': 0}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing

In [None]:
# 通用测试集 transform
transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# %%
# 设备检查
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# 构建ResNet-18模型
def build_resnet18():
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)
    return model.to(device)


In [None]:
# 通用训练函数
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=10):
    train_losses, train_accs, val_losses, val_accs = [], [], [], []
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # 验证
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        val_loss /= len(test_loader)
        val_acc = val_correct / val_total
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f} Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}\n')

    return train_losses, train_accs, val_losses, val_accs, best_acc


In [None]:
# 训练 ResNet-18 + 数据增强
print("Training ResNet-18 with augmentation...")
train_dataset_aug = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/train', transform=transform_torchvision)
train_loader_aug = DataLoader(train_dataset_aug, batch_size=32, shuffle=True, num_workers=0)

test_dataset = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/test', transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

model_aug = build_resnet18()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_aug.parameters(), lr=0.001, momentum=0.9)

train_loss_aug, train_acc_aug, val_loss_aug, val_acc_aug, best_aug = train_model(
    model_aug, train_loader_aug, test_loader, criterion, optimizer, num_epochs=10
)


In [None]:
# 训练 ResNet-18 无数据增强
print("Training ResNet-18 without augmentation...")
train_dataset_noaug = datasets.ImageFolder('D:/review&task/大三下/深度学习/Exp3/hotdog/train', transform=transform_no_aug)
train_loader_noaug = DataLoader(train_dataset_noaug, batch_size=32, shuffle=True, num_workers=0)

model_noaug = build_resnet18()
optimizer_noaug = torch.optim.SGD(model_noaug.parameters(), lr=0.001, momentum=0.9)

train_loss_noaug, train_acc_noaug, val_loss_noaug, val_acc_noaug, best_noaug = train_model(
    model_noaug, train_loader_noaug, test_loader, criterion, optimizer_noaug, num_epochs=10
)


In [None]:
# 可视化对比
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_loss_noaug, label='No Augmentation')
plt.plot(train_loss_aug, label='With Augmentation')
plt.title('Training Loss Comparison')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(val_acc_noaug, label='No Augmentation')
plt.plot(val_acc_aug, label='With Augmentation')
plt.title('Validation Accuracy Comparison')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# %%
# 打印最终结果
print("| 模型 | 训练准确率 | 测试准确率 |")
print("| ---- | ---------- | ---------- |")
print(f"| ResNet-18 无数据增广 | {train_acc_noaug[-1]:.4f} | {val_acc_noaug[-1]:.4f} |")
print(f"| ResNet-18 有数据增广 | {train_acc_aug[-1]:.4f} | {val_acc_aug[-1]:.4f} |")