In [42]:
import cv2
import numpy as np
import os
import random
import shutil

def is_blurry(image, threshold=20):
    # 使用拉普拉斯算子检测模糊
    laplacian = cv2.Laplacian(image, cv2.CV_64F).var()
    return laplacian < threshold

def is_dark(image, brightness_threshold=100):
    # 转换到 HSV 色彩空间
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 计算亮度通道的平均值
    _, _, v = cv2.split(hsv)
    mean_brightness = np.mean(v)
    return mean_brightness < brightness_threshold

def augment_images(images_dir, output_dir):
    # 如果输出目录存在，则清空，否则创建新目录
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    # 获取目录中的所有图像文件名
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.jpeg')]
    # 遍历所有图像文件
    for image_file in image_files:
        # 打开图像文件
        img_path = os.path.join(images_dir, image_file)
        img = cv2.imread(img_path)

        # 检查图像是否模糊
        if is_blurry(img) or img.shape[0] < 150 or img.shape[1] < 150 or is_dark(img):
            # print(img_path)
            # cv2.imwrite(os.path.join("../Dataset/discard_photos", image_file), img)
            continue

        # 随机添加噪声、模糊、颜色变换等
        if np.random.randint(0, 3) == 0:
            img = cv2.GaussianBlur(img, (3, 3), 0)
        if np.random.randint(0, 3) == 0:
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            hsv[..., 0] = np.clip(hsv[..., 0].astype(int) + np.random.randint(-5, 5), 0, 179)
            hsv[..., 1] = np.clip(hsv[..., 1].astype(int) + np.random.randint(-15, 15), 0, 255)
            hsv[..., 2] = np.clip(hsv[..., 2].astype(int) + np.random.randint(-25, 25), 0, 255)
            img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

        # 保存增强后的图像
        cv2.imwrite(os.path.join(output_dir, f"aug_{image_file}"), img)

images_dir = '../Dataset/photos/3001/'
output_dir = '../Dataset/augmented_photos/3001'
augment_images(images_dir, output_dir)

../Dataset/photos/3001/3001_Aqua_0_1608398303.jpeg
../Dataset/photos/3001/3001_Aqua_0_1608532549.jpeg
../Dataset/photos/3001/3001_Aqua_0_1608651776.jpeg
../Dataset/photos/3001/3001_Aqua_0_1608655679.jpeg
../Dataset/photos/3001/3001_Aqua_0_1608817965.jpeg
../Dataset/photos/3001/3001_Aqua_0_1617500070.jpeg
../Dataset/photos/3001/3001_Aqua_0_1619192040.jpeg
../Dataset/photos/3001/3001_Aqua_0_1619401353.jpeg
../Dataset/photos/3001/3001_Aqua_0_1619968531.jpeg
../Dataset/photos/3001/3001_Aqua_1_1608398350.jpeg
../Dataset/photos/3001/3001_Aqua_1_1608532614.jpeg
../Dataset/photos/3001/3001_Aqua_1_1608651803.jpeg
../Dataset/photos/3001/3001_Aqua_1_1608655740.jpeg
../Dataset/photos/3001/3001_Aqua_1_1608817990.jpeg
../Dataset/photos/3001/3001_Aqua_1_1619401353.jpeg
../Dataset/photos/3001/3001_Aqua_2_1608532660.jpeg
../Dataset/photos/3001/3001_Aqua_2_1608651833.jpeg
../Dataset/photos/3001/3001_Aqua_2_1608655781.jpeg
../Dataset/photos/3001/3001_Aqua_2_1608818021.jpeg
../Dataset/photos/3001/3001_Aqu

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split

# 设置全局参数
modellr = 1e-4
BATCH_SIZE = 64
EPOCHS = 20
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])

])

# 读取样本和标签
data_set = datasets.ImageFolder('../Dataset/photos', transform)
print(data_set.class_to_idx)

# 随机划分数据集
train_ratio = 0.8
train_size = int(train_ratio * len(data_set))
test_size = len(data_set) - train_size
train_set, test_set = random_split(data_set, [train_size, test_size])

# 导入数据
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

# 实例化
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features
num_classes = len(data_set.classes)
model.fc = nn.Linear(num_ftrs, num_classes)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=modellr)  # 选择简单暴力的Adam优化器，学习率调低

def adjust_learning_rate(optimizer, epoch, initial_lr):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = initial_lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


# 训练模型
def train_model(model, criterion, optimizer, train_loader, epochs, device, initial_lr):
    model.train()
    for epoch in range(epochs):
        adjust_learning_rate(optimizer, epoch, initial_lr)
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")
    print("Training complete")


# 测试模型
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Accuracy on the test set: {accuracy:.2f}%")

# 训练和测试模型
train_model(model, criterion, optimizer, train_loader, EPOCHS, DEVICE, modellr)
test_model(model, test_loader, DEVICE)

# 保存模型
model_save_path = "../model/resnet50_with_renders.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")



{'17485': 0, '2456': 1, '2730': 2, '3001': 3, '3002': 4, '3003': 5, '3004': 6, '3006': 7, '3007': 8, '3009': 9, '3010': 10, '30414': 11, '3622': 12, '4600': 13, '6111': 14, '87079': 15}
Epoch 1/20, Loss: 1.8688180017535838
Epoch 2/20, Loss: 1.3501635281739603
Epoch 3/20, Loss: 1.1931923853689665
Epoch 4/20, Loss: 1.1039456704150032
Epoch 5/20, Loss: 1.046197960760661
Epoch 6/20, Loss: 1.0042561634628957
Epoch 7/20, Loss: 0.9695747367261388
Epoch 8/20, Loss: 0.9403432870588703
Epoch 9/20, Loss: 0.9191266004706913
Epoch 10/20, Loss: 0.8974268259795657
Epoch 11/20, Loss: 0.8827400753559376
Epoch 12/20, Loss: 0.866985842000809
Epoch 13/20, Loss: 0.8501224895291464
Epoch 14/20, Loss: 0.8434784568049749
Epoch 15/20, Loss: 0.8285353603963116
Epoch 16/20, Loss: 0.8223756243637353
Epoch 17/20, Loss: 0.8068088214839424
Epoch 18/20, Loss: 0.7992068046807275
Epoch 19/20, Loss: 0.7931198143829996
Epoch 20/20, Loss: 0.7844932316039993
Training complete
Accuracy on the test set: 74.72%
Model saved to

In [6]:
# 模型预测效果
from PIL import Image, ImageDraw
import os
import torch
from torchvision import transforms, models

# 模型路径
model_path = '../model/resnet50_with_renders.pth' 
image_dir = '../Dataset/test/4'
output_dir = '../Dataset/test_result' 
num_classes = 16


# 创建模型实例
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, num_classes)  # <num_classes> 需要替换为类别数量

# 加载模型的 state_dict
model.load_state_dict(torch.load(model_path))
model.eval()

# 预处理函数
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# 加载并预处理图片
def load_and_preprocess_image(image_path):
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)
    return image

# 预测函数
def predict(model, image_tensor):
    outputs = model(image_tensor)
    _, predicted = torch.max(outputs.data, 1)
    return predicted.item()

# 对文件夹中的每张图像进行预测，并在图像上写入预测结果
for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(image_dir, filename)
        image_tensor = load_and_preprocess_image(image_path)
        predicted_class_idx = predict(model, image_tensor)

        # 在图像上写入预测结果
        output_image = Image.open(image_path)
        draw = ImageDraw.Draw(output_image)
        draw.text((10, 30), f'Predicted: {predicted_class_idx}', fill=(255, 0, 0))

        # 保存修改后的图像
        output_path = os.path.join(output_dir, filename)
        output_image.save(output_path)

print("All images processed and saved in the output directory.")

All images processed and saved in the output directory.
