# 人脸检测代码

主要功能包括：
1. 图像预处理和格式转换
2. 多尺度人脸检测
3. 检测结果的后处理和可视化


## 导入必要的包

In [1]:
# %load test.py
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

import os
import cv2
import numpy as np
from PIL import Image
from pathlib import Path

import torch
from torch.autograd import Variable
import torch.backends.cudnn as cudnn

from models.factory import build_net
from torchvision.utils import make_grid
import glob


## 设置CUDA环境

检查是否可以使用GPU，并相应地设置PyTorch的默认张量类型。

In [None]:

use_cuda = torch.cuda.is_available()

if use_cuda:
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    cudnn.benckmark = True
else:
    torch.set_default_tensor_type('torch.FloatTensor')


## 工具函数

### 张量转图像
将PyTorch张量转换为numpy数组格式的图像。

In [None]:

def tensor_to_image(tensor):
    grid = make_grid(tensor)
    ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
    return ndarr


### 图像格式转换
将图像从HWC格式转换为CHW格式，并从RGB转换为BGR。这是深度学习模型常用的输入格式。

In [None]:

def to_chw_bgr(image):
    """
    将图像从HWC格式转换为CHW格式，并从RGB转换为BGR。
    参数:
        image (np.array): 具有HWC和RGB布局的图像。
    """
    # HWC转CHW
    if len(image.shape) == 3:
        image = np.swapaxes(image, 1, 2)
        image = np.swapaxes(image, 1, 0)
    # RGB转BGR
    image = image[[2, 1, 0], :, :]
    return image


## 人脸检测核心函数

### 基础人脸检测
使用深度学习模型检测图像中的人脸，返回检测框和置信度分数。

In [None]:

def detect_face(img, tmp_shrink):
    image = cv2.resize(img, None, None, fx=tmp_shrink,
                       fy=tmp_shrink, interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x = x / 255.
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    
    if use_cuda:
        x = x.cuda()

    y = net.test_forward(x)[0]
    detections = y.data.cpu().numpy()
    scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    boxes=[]
    scores = []
    for i in range(detections.shape[1]):
      j = 0
      while ((j < detections.shape[2]) and detections[0, i, j, 0] > 0.0):
        pt = (detections[0, i, j, 1:] * scale)
        score = detections[0, i, j, 0]
        boxes.append([pt[0],pt[1],pt[2],pt[3]])
        scores.append(score)
        j += 1

    det_conf = np.array(scores)
    boxes = np.array(boxes)

    if boxes.shape[0] == 0:
        return np.array([[0,0,0,0,0.001]])

    det_xmin = boxes[:,0] # / tmp_shrink
    det_ymin = boxes[:,1] # / tmp_shrink
    det_xmax = boxes[:,2] # / tmp_shrink
    det_ymax = boxes[:,3] # / tmp_shrink
    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))

    return det


### 图像翻转测试
对图像进行水平翻转后进行检测，然后将检测结果映射回原始图像坐标系。

In [None]:

def flip_test(image, shrink):
    image_f = cv2.flip(image, 1)
    det_f = detect_face(image_f, shrink)

    det_t = np.zeros(det_f.shape)
    det_t[:, 0] = image.shape[1] - det_f[:, 2]
    det_t[:, 1] = det_f[:, 1]
    det_t[:, 2] = image.shape[1] - det_f[:, 0]
    det_t[:, 3] = det_f[:, 3]
    det_t[:, 4] = det_f[:, 4]
    return det_t


### 多尺度测试
通过不同的缩放比例对图像进行检测，以提高对不同大小人脸的检测效果。

In [None]:

def multi_scale_test(image, max_im_shrink):
    # 缩小检测，仅检测大人脸
    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
    det_s = detect_face(image, st)
    if max_im_shrink > 0.75:
        det_s = np.row_stack((det_s,detect_face(image, 0.75)))
    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
    det_s = det_s[index, :]
    # 放大一次
    bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2
    det_b = detect_face(image, bt)

    # 放大小图像x次以检测小人脸
    if max_im_shrink > 1.5:
        det_b = np.row_stack((det_b,detect_face(image, 1.5)))
    if max_im_shrink > 2:
        bt *= 2
        while bt < max_im_shrink: # and bt <= 2:
            det_b = np.row_stack((det_b, detect_face(image, bt)))
            bt *= 2

        det_b = np.row_stack((det_b, detect_face(image, max_im_shrink)))

    # 仅放大检测小人脸
    if bt > 1:
        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
        det_b = det_b[index, :]
    else:
        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
        det_b = det_b[index, :]

    return det_s, det_b


### 金字塔多尺度测试
使用图像金字塔方法进行多尺度检测，特别适合检测不同大小的人脸。

In [None]:

def multi_scale_test_pyramid(image, max_shrink):
    det_b = detect_face(image, 0.25)
    index = np.where(
        np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
        > 30)[0]
    det_b = det_b[index, :]

    st = [1.25, 1.75, 2.25]
    for i in range(len(st)):
        if (st[i] <= max_shrink):
            det_temp = detect_face(image, st[i])
            # 仅放大检测小人脸
            if st[i] > 1:
                index = np.where(
                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
                det_temp = det_temp[index, :]
            else:
                index = np.where(
                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
                det_temp = det_temp[index, :]
            det_b = np.row_stack((det_b, det_temp))
    return det_b


### 边界框投票
对多个检测结果进行非极大值抑制(NMS)处理，合并重叠的检测框。

In [None]:

def bbox_vote(det_):
    order_ = det_[:, 4].ravel().argsort()[::-1]
    det_ = det_[order_, :]
    dets_ = np.zeros((0, 5),dtype=np.float32)
    while det_.shape[0] > 0:
        # IOU计算
        area_ = (det_[:, 2] - det_[:, 0] + 1) * (det_[:, 3] - det_[:, 1] + 1)
        xx1 = np.maximum(det_[0, 0], det_[:, 0])
        yy1 = np.maximum(det_[0, 1], det_[:, 1])
        xx2 = np.minimum(det_[0, 2], det_[:, 2])
        yy2 = np.minimum(det_[0, 3], det_[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o_ = inter / (area_[0] + area_[:] - inter)

        # 获取需要合并的检测框并删除这些检测框
        merge_index_ = np.where(o_ >= 0.3)[0]
        det_accu_ = det_[merge_index_, :]
        det_ = np.delete(det_, merge_index_, 0)

        if merge_index_.shape[0] <= 1:
            continue
        det_accu_[:, 0:4] = det_accu_[:, 0:4] * np.tile(det_accu_[:, -1:], (1, 4))
        max_score_ = np.max(det_accu_[:, 4])
        det_accu_sum_ = np.zeros((1, 5))
        det_accu_sum_[:, 0:4] = np.sum(det_accu_[:, 0:4], axis=0) / np.sum(det_accu_[:, -1:])
        det_accu_sum_[:, 4] = max_score_
        try:
            dets_ = np.row_stack((dets_, det_accu_sum_))
        except:
            dets_ = det_accu_sum_

    dets_ = dets_[0:750, :]
    return dets_


### 模型加载
加载预训练的人脸检测模型。

In [None]:

def load_models():
    print('构建网络')
    net = build_net('test', num_classes=2, model='dark')
    net.eval()
    checkpoint = torch.load('/data1/home/chenruoyu/DAI-Net/weights/dark/dsfd_checkpoint.pth') # 加载检查点字典
    net.load_state_dict(checkpoint['weight']) # 从'weight'键加载模型状态字典

    if use_cuda:
        net = net.cuda()

    return net


## 主程序

设置参数并执行人脸检测流程。首先通过设置参数控制检测策略，包括是否使用多尺度检测和图像缩放比例。在检测过程中，代码采用了多种策略来提高检测效果：对原始图像进行检测、对翻转后的图像进行检测、使用不同缩放比例进行多尺度检测，以及使用图像金字塔方法进行检测。所有检测结果会被合并，并通过非极大值抑制(NMS)处理重叠的检测框，同时使用置信度阈值(0.8)过滤低置信度的结果。最后，代码会在图像上绘制检测框，并将结果保存为图像文件和包含检测框坐标及置信度分数的文本文件。整个过程中会显示处理进度，方便监控检测过程。这种多策略的检测方法特别适合处理包含不同大小、角度的人脸的图像，能够有效提高检测的准确性和鲁棒性。

In [None]:

if __name__ == '__main__':

    ''' 参数 '''

    USE_MULTI_SCALE = True
    MY_SHRINK = 1

    # USE_MULTI_SCALE = False
    # MY_SHRINK = 2

    save_path = './result/'

    def load_images():
      imglist = glob.glob('/data1/home/chenruoyu/DAI-Net/eaf141b8ca103504e9cd8a33b625788.png') # 设置测试数据的目录
      return imglist

    ''' 主测试 '''

    net = load_models()
    img_list = load_images()

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    now = 0
    print('处理中: {}/{}'.format(now+1, img_list.__len__()))
    for img_path in img_list:
        # 加载图像       
        image = Image.open(img_path)
        if image.mode == 'L':
            image = image.convert('RGB')
        image = np.array(image)

        # 人脸检测
        max_im_shrink = (0x7fffffff / 200.0 / (image.shape[0] * image.shape[1])) ** 0.5 # caffe输入图像的最大尺寸
        max_im_shrink = 3 if max_im_shrink > 3 else max_im_shrink

        if USE_MULTI_SCALE:
            with torch.no_grad():
                det0 = detect_face(image, MY_SHRINK)  # 原始测试
                det1 = flip_test(image, MY_SHRINK)    # 翻转测试
                [det2, det3] = multi_scale_test(image, max_im_shrink) # 多尺度测试
                det4 = multi_scale_test_pyramid(image, max_im_shrink)
            det = np.row_stack((det0, det1, det2, det3, det4))
            dets = bbox_vote(det)
        else:
            with torch.no_grad():
                dets = detect_face(image, MY_SHRINK)  # 原始测试

        # 通过置信度分数过滤检测结果
        confidence_threshold = 0.8 # 您可以根据需要调整这个阈值，例如 0.7, 0.8 等
        filtered_dets = dets[dets[:, 4] > confidence_threshold]

        # 在图像上绘制边界框并保存
        output_image = image.copy() # 创建副本用于绘制
        for i in range(filtered_dets.shape[0]): # 使用过滤后的检测结果
            xmin, ymin, xmax, ymax, score = filtered_dets[i]
            # 绘制矩形（BGR格式的颜色，线宽）
            cv2.rectangle(output_image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)
            # 可选：添加文本（分数）
            # cv2.putText(output_image, f'{score:.2f}', (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        # 保存带有检测结果的图像
        output_filename = os.path.join(save_path, Path(os.path.basename(img_path)).stem + '_det.jpg') # 为图像使用不同的后缀
        cv2.imwrite(output_filename, output_image)

        # 保存结果到txt文件
        fout = open(os.path.join(save_path, Path(os.path.basename(img_path)).stem + '.txt'), 'w')
        for i in range(filtered_dets.shape[0]): # 使用过滤后的检测结果
            xmin = filtered_dets[i][0]
            ymin = filtered_dets[i][1]
            xmax = filtered_dets[i][2]
            ymax = filtered_dets[i][3]
            score = filtered_dets[i][4]
            fout.write('{} {} {} {} {}\\n'.format(xmin, ymin, xmax, ymax, score))
        now += 1
        print('处理中: {}/{}'.format(now + 1, img_list.__len__())) 