In [11]:
import pandas as pd
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split
import random
import numpy as np

# 设置随机数种子
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# 使用函数设置随机数种子
set_seed(42)  # 42是一个常见的“魔数”，用作随机数种子

# 读取Excel文件，假设文件中有两列，一列是'image_name'，另一列是'label'
labels_df = pd.read_excel(r'E:\database\labels.xlsx', engine='openpyxl')  # 使用openpyxl引擎读取Excel文件

# 指定列名
labels_df.columns = ['image_name', 'label']

# 定义FaceDataset类，继承自torch.utils.data.Dataset
class FaceDataset(Dataset):
    def __init__(self, labels_df, root_dir, transform=None):
        self.labels_df = labels_df
        self.root_dir = root_dir
        self.transform = transform
        self.valid_indices = self._filter_valid_indices()

    def _filter_valid_indices(self):
        valid_indices = []
        for idx in range(len(self.labels_df)):
            img_name = str(self.labels_df.iloc[idx]['image_name'])
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_name += '.jpg'
            img_path = os.path.join(self.root_dir, img_name)
            if os.path.exists(img_path):
                valid_indices.append(idx)
        return valid_indices

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
        img_idx = self.valid_indices[idx]
        img_name = str(self.labels_df.iloc[img_idx]['image_name'])
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('L')
        label = self.labels_df.iloc[img_idx]['label']
        if self.transform:
            image = self.transform(image)
        return image, label

# 定义转换流程
transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.Grayscale(num_output_channels=1),  # 更改为单通道
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # 标准化单通道
])

# 总数据集
full_dataset = FaceDataset(labels_df, root_dir=r'E:\database\images', transform=transform)

# 划分比例
train_size = int(0.8 * len(full_dataset))
val_size = int(0.1 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

# 划分数据集
train_dataset, val_test_dataset = random_split(full_dataset, [train_size, len(full_dataset) - train_size])
val_dataset, test_dataset = random_split(val_test_dataset, [val_size, test_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 定义FaceNet类，继承自torch.nn.Module
class FaceNet(nn.Module):
    def __init__(self, num_classes):
        super(FaceNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 计算展平尺寸，假设输入图像尺寸为100x100
        self.feature_dim = 128 * 12 * 12  # 12是100/2^3的结果
        self.fc1 = nn.Linear(self.feature_dim, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, self.feature_dim)  # 使用动态计算的展平尺寸
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 设置设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = FaceNet(num_classes=len(labels_df['label'].unique())).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 20
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

    # 验证模型
    model.eval()  # Set model to evaluate mode
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Val Accuracy: {100 * correct / total}%')

# 评估模型
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the test dataset: {100 * correct / total}%')

Epoch [1/20], Val Accuracy: 60.526315789473685%
Epoch [2/20], Val Accuracy: 60.526315789473685%
Epoch [3/20], Val Accuracy: 74.56140350877193%
Epoch [4/20], Val Accuracy: 77.19298245614036%
Epoch [5/20], Val Accuracy: 81.57894736842105%
Epoch [6/20], Val Accuracy: 82.45614035087719%
Epoch [7/20], Val Accuracy: 78.94736842105263%
Epoch [8/20], Val Accuracy: 79.82456140350877%
Epoch [9/20], Val Accuracy: 83.33333333333333%
Epoch [10/20], Val Accuracy: 85.08771929824562%
Epoch [11/20], Val Accuracy: 82.45614035087719%
Epoch [12/20], Val Accuracy: 85.96491228070175%
Epoch [13/20], Val Accuracy: 85.08771929824562%
Epoch [14/20], Val Accuracy: 82.45614035087719%
Epoch [15/20], Val Accuracy: 86.84210526315789%
Epoch [16/20], Val Accuracy: 81.57894736842105%
Epoch [17/20], Val Accuracy: 84.21052631578948%
Epoch [18/20], Val Accuracy: 83.33333333333333%
Epoch [19/20], Val Accuracy: 85.96491228070175%
Epoch [20/20], Val Accuracy: 84.21052631578948%
Accuracy of the network on the test dataset: 87