In [None]:
import cv2 
from matplotlib import pyplot as plt

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import random
import os
import csv
import cv2
from frame import FLCDataset, visualize_transformed_image


In [None]:
IMG_INPUT_SIZE = [12,12]

# 定义转换操作
transform = transforms.Compose([
    transforms.Resize(IMG_INPUT_SIZE[0]),
    transforms.CenterCrop(IMG_INPUT_SIZE[0]),
    transforms.ToTensor(),  # 将PIL图像或NumPy ndarray转换为FloatTensor。
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # 标准化，使用ImageNet的均值和标准差
                         std=[0.229, 0.224, 0.225])
])


def label_transform(label, img_size):
    # 目标尺寸
    nh, nw = IMG_INPUT_SIZE[1], IMG_INPUT_SIZE[0]
    # 原始尺寸
    h, w = img_size
    # 计算缩放比例
    x_scale = nw / w
    y_scale = nh / h
    
    # 处理标签中的每个坐标
    transformed_label = []
    for i, value in enumerate(label):
        if i % 2 == 0:  # 偶数索引位置，x坐标
            transformed_label.append(value * x_scale)
        else:  # 奇数索引位置，y坐标
            transformed_label.append(value * y_scale)
            
    return transformed_label



In [44]:
train_dataset = FLCDataset(r"C:\Users\lucyc\Desktop\face_loc\train.csv", r"C:\Users\lucyc\Desktop\face_loc\train", transform, label_transform)
val_dataset = FLCDataset(r"C:\Users\lucyc\Desktop\face_loc\val.csv", r"C:\Users\lucyc\Desktop\face_loc\val", transform, label_transform)
test_dataset = FLCDataset(r"C:\Users\lucyc\Desktop\face_loc\test.csv", r"C:\Users\lucyc\Desktop\face_loc\test", transform, label_transform)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [45]:
#a, b = next(iter(val_loader))

In [46]:
def get_args(b):
    return [int(float(x)//1) for x in b.split()]

In [47]:
#visualize_transformed_image(a[6],get_args(b[6]),get_args(b[6]))

In [48]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "CPU")
print(device)
torch.cuda.empty_cache()

cuda:0


In [49]:
class PNet(nn.Module):

    def __init__(self):
        super(PNet, self).__init__()

        # 定义网络层
        self.conv1 = nn.Conv2d(3, 10, 3)  #12 -> 10 -> maxp -> 5
        self.conv2 = nn.Conv2d(10, 16, 3) #5 -> 3
        self.conv3 = nn.Conv2d(16, 32, 3) #3 -> 1

        self.face_det = nn.Conv2d(32, 2, 1) #1 -> 1
        self.bbox = nn.Conv2d(32, 4, 1) #1 -> 1
        self.landmark = nn.Conv2d(32, 10, 1) #1 -> 1

    def forward(self, x):
        # 定义前向传播
        x = F.relu(self.conv1(x)) #10
        x = F.max_pool2d(x, 2) #5
        x = F.relu(self.conv2(x)) #3
        x = F.relu(self.conv3(x)) #1

        facedet = F.relu(self.face_det(x))
        bbox = F.relu(self.bbox(x))
        landmark = F.relu(self.landmark(x))

        return facedet, bbox, landmark


In [None]:
class PNetLoss(nn.Module):
    def __init__(self):
        super(PNetLoss, self).__init__()
        # 这里可以初始化任何需要的参数或层

    def forward(self, facedet, bbox, landmark, label, type):
        
        if type == 'face':
            # 计算人脸分类损失
            facedet_loss = F.cross_entropy(facedet, label)
            return facedet_loss

        elif type == 'bbox':
            # 计算边界框回归损失
            bbox_loss = F.mse_loss(bbox, label)
            return bbox_loss
        
        elif type == 'landmark':
            # 计算关键点回归损失
            landmark_loss = F.mse_loss(landmark, label)
            return landmark_loss
        
        
        # 计算三元组损失
        losses = F.relu(distance_positive - distance_negative + 3)
        
        # 返回平均损失
        return losses.mean()

In [None]:
torch.cuda.empty_cache()
torch.autograd.set_detect_anomaly(True)

In [None]:
model = PNet()
print(model)

model.to(device)  # 将模型发送到GPU，如果有的话

# 定义损失函数和优化器
criterion = PNetLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 训练模型
num_epochs = 5

for epoch in range(num_epochs):
    model.train()  # 设置模型为训练模式

    train_loss_acc = 0.
    train_num = 0

    for inputs in train_loader:
        
        img_tensor = inputs[0].to(device)
        labble_str = inputs[1].split()
        label = [int(float(x)//1) for x in labble_str]

        facedet, bbox, landmark = model(img_tensor)

        if len(label) > 4:
            

        
        else:
            
        
        # 前向传播
        anchor = model(anchor)
        positive = model(positive)
        negative = model(negative)

        optimizer.zero_grad()  # 清除之前的梯度
        loss = criterion(anchor, positive, negative)
        # 反向传播和优化
        loss.backward()  # 反向传播计算当前的梯度
        optimizer.step()  # 更新参数

        train_loss_acc += loss.item()
        train_num += 1

        #print(loss)

    model.eval() 
    val_loss_acc = 0
    val_num = 0
    with torch.no_grad():
        for inputs in val_loader:
            anchor = inputs[0].to(device)
            positive = inputs[1].to(device)
            negative = inputs[2].to(device)
            
            anchor = model(anchor)
            positive = model(positive)
            negative = model(negative)

            loss = criterion(anchor, positive, negative)
            #loss += abs(5-criterion(anchor, negative))
            val_loss_acc += loss
            val_num += 1

    print("Epoch [{}/{}], Loss: {:.2f}, Val_loss: {:.2f}".format(epoch+1, num_epochs, train_loss_acc/train_num, val_loss_acc/val_num))
