In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.4.4 mtcnn-1.0.0


In [None]:
import os
import time
import cv2
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from mtcnn import MTCNN
from PIL import Image

# Khởi tạo detector
detector = MTCNN()

# Load ground truth
def load_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    data = {}
    for image in root.findall("image"):
        file_name = image.attrib["name"]
        box = image.find("box")
        x1, y1 = float(box.attrib["xtl"]), float(box.attrib["ytl"])
        x2, y2 = float(box.attrib["xbr"]), float(box.attrib["ybr"])
        data[file_name] = (x1, y1, x2, y2)
    return data

# Tính IoU
def compute_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    if interArea == 0:
        return 0.0
    boxAArea = max(1e-6, (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = max(1e-6, (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
    return interArea / (boxAArea + boxBArea - interArea)

# Khoảng cách giữa hai tâm
def compute_center_distance(box1, box2):
    cx1 = (box1[0] + box1[2]) / 2
    cy1 = (box1[1] + box1[3]) / 2
    cx2 = (box2[0] + box2[2]) / 2
    cy2 = (box2[1] + box2[3]) / 2
    return np.sqrt((cx1 - cx2)**2 + (cy1 - cy2)**2)

# Xử lý toàn bộ ảnh
def process_images(base_folder, annotation_path):
    annotations = load_annotations(annotation_path)
    records = []

    for root, _, files in os.walk(base_folder):
        for file in files:
            if not file.lower().endswith((".jpg", ".png", ".jpeg")):
                continue
            file_path = os.path.join(root, file)
            rel_path = os.path.relpath(file_path, base_folder).replace("\\", "/")
            image_key = f"image_customer/{rel_path}"

            img = cv2.imread(file_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            start = time.time()
            results = detector.detect_faces(img_rgb)
            inference_time = time.time() - start

            pred_box = [0, 0, 0, 0]

            # Chỉ lấy face có độ tin cậy cao nhất
            if results:
                best_result = max(results, key=lambda x: x['confidence'])
                x, y, w, h = best_result['box']
                pred_box = [x, y, x + w, y + h]

            gt_box = annotations.get(image_key, [0, 0, 0, 0])
            iou = compute_iou(pred_box, gt_box)
            center_distance = compute_center_distance(pred_box, gt_box)

            records.append({
                "file_name": image_key,
                "x1": pred_box[0],
                "y1": pred_box[1],
                "x2": pred_box[2],
                "y2": pred_box[3],
                "IoU": iou,
                "center_distance": center_distance,
                "inference_time": inference_time
            })

    return pd.DataFrame(records)



In [None]:
annotation_path = "/content/drive/MyDrive/Report/data/annotations.xml"
image_folder = "/content/drive/MyDrive/Report/data/image_customer"

df = process_images(image_folder, annotation_path)
df.to_csv("/content/drive/MyDrive/Report/result/MTCNN_face_detection.csv", index=False)
print("✅ Đã hoàn tất và lưu kết quả!")


FileNotFoundError: [Errno 2] No such file or directory: '/path/to/annotations.xml'

# Haar

In [15]:
import os
import cv2
import time
import pandas as pd
import xml.etree.ElementTree as ET
from math import sqrt

# Load Haar Cascade
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Load Ground Truth từ XML
def load_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    data = {}
    for image in root.findall("image"):
        file_name = image.attrib["name"]
        box = image.find("box")
        if box is not None:
            x1 = float(box.attrib["xtl"])
            y1 = float(box.attrib["ytl"])
            x2 = float(box.attrib["xbr"])
            y2 = float(box.attrib["ybr"])
            data[file_name] = (x1, y1, x2, y2)  # ✅ đúng định dạng (x1, y1, x2, y2)
    return data

# Tính IoU theo định dạng (x1, y1, x2, y2)
def calculate_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    if interArea == 0:
        return 0.0

    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea + 1e-6)
    return iou

# Tính khoảng cách tâm box theo định dạng (x1, y1, x2, y2)
def calculate_center_distance(box1, box2):
    cx1 = (box1[0] + box1[2]) / 2
    cy1 = (box1[1] + box1[3]) / 2
    cx2 = (box2[0] + box2[2]) / 2
    cy2 = (box2[1] + box2[3]) / 2
    return sqrt((cx1 - cx2)**2 + (cy1 - cy2)**2)

# Đường dẫn
image_root = "/content/drive/MyDrive/Report/data/image_customer"
annotation_file = '/content/drive/MyDrive/Report/data/annotations.xml'
output_csv = '/content/drive/MyDrive/Report/result/Haar_face_detection.csv'

# Load ground truth
groundtruth = load_annotations(annotation_file)

results = []

for dirpath, _, filenames in os.walk(image_root):
    for fname in filenames:
        if fname.lower().endswith(('.jpg', '.png')):
            full_path = os.path.join(dirpath, fname)
            rel_path = os.path.relpath(full_path, image_root).replace("\\", "/")
            image_key = f"image_customer/{rel_path}"  # ✅ giống với key trong XML

            img = cv2.imread(full_path)
            if img is None:
                print(f"[ERROR] Không thể đọc ảnh: {full_path}")
                continue

            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            start_time = time.time()
            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
            inference_time = time.time() - start_time

            # Chọn face lớn nhất nếu có
            if len(faces) > 0:
                x, y, w, h = max(faces, key=lambda b: b[2]*b[3])
                pred_box = (x, y, x + w, y + h)
            else:
                pred_box = (0, 0, 0, 0)

            gt_box = groundtruth.get(image_key, (0, 0, 0, 0))
            iou = calculate_iou(gt_box, pred_box)
            dist = calculate_center_distance(gt_box, pred_box)

            results.append({
                'filename': image_key,
                'x1': pred_box[0],
                'y1': pred_box[1],
                'x2': pred_box[2],
                'y2': pred_box[3],
                'IoU': round(iou, 4),
                'center_distance': round(dist, 2),
                'inference_time': round(inference_time, 4)
            })

# Xuất ra CSV
df = pd.DataFrame(results)
df.to_csv(output_csv, index=False)
print(f"[INFO] Đã lưu kết quả vào: {output_csv}")


[INFO] Đã lưu kết quả vào: /content/drive/MyDrive/Report/result/Haar_face_detection.csv


# Yolo

In [3]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.114-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [16]:
import os
import cv2
import time
import torch
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from ultralytics import YOLO
import logging
from tqdm import tqdm

logging.getLogger("ultralytics").setLevel(logging.WARNING)

# ===================== PHẦN 1: HÀM TÍNH TOÁN =====================

def calculate_iou(boxA, boxB):
    """Tính IoU giữa hai bounding box định dạng (x1, y1, x2, y2)"""
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    if interArea == 0:
        return 0.0

    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    return interArea / float(boxAArea + boxBArea - interArea + 1e-6)

def calculate_center_distance(box1, box2):
    """Tính khoảng cách giữa hai tâm bounding box định dạng (x1, y1, x2, y2)"""
    cx1 = (box1[0] + box1[2]) / 2
    cy1 = (box1[1] + box1[3]) / 2
    cx2 = (box2[0] + box2[2]) / 2
    cy2 = (box2[1] + box2[3]) / 2
    return np.sqrt((cx1 - cx2)**2 + (cy1 - cy2)**2)

# ===================== PHẦN 2: PHÁT HIỆN KHUÔN MẶT =====================

class YOLOFaceDetector:
    def __init__(self, model_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.device = device
        self.model = YOLO(model_path).to(device)

    def detect(self, img_path, conf_thres=0.25, iou_thres=0.45):
        img = cv2.imread(img_path)
        if img is None:
            print(f"[ERROR] Không thể đọc ảnh: {img_path}")
            return []

        results = self.model.predict(img, conf=conf_thres, iou=iou_thres, device=self.device)
        best_box = None
        best_conf = -1

        for result in results:
            for box, conf in zip(result.boxes.xyxy.cpu().numpy(), result.boxes.conf.cpu().numpy()):
                if conf > best_conf:
                    x1, y1, x2, y2 = map(int, box[:4])
                    best_box = (x1, y1, x2, y2)  # ✅ giữ nguyên (x1, y1, x2, y2)
                    best_conf = conf

        if best_box is None:
            print(f"[WARNING] Không phát hiện khuôn mặt trong ảnh: {img_path}")
        return [best_box] if best_box else []

# ===================== PHẦN 3: LOAD GROUND TRUTH =====================

def load_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    data = {}
    for image in root.findall("image"):
        file_name = image.attrib["name"]  # đã bao gồm image_customer/
        box = image.find("box")
        if box is not None:
            x1 = float(box.attrib["xtl"])
            y1 = float(box.attrib["ytl"])
            x2 = float(box.attrib["xbr"])
            y2 = float(box.attrib["ybr"])
            data[file_name] = (x1, y1, x2, y2)  # ✅ giữ nguyên (x1, y1, x2, y2)
    return data

# ===================== PHẦN 4: ĐÁNH GIÁ TOÀN BỘ =====================

def evaluate_yolo_face(image_root, model_path, annotation_path, output_csv):
    detector = YOLOFaceDetector(model_path)
    annotations = load_annotations(annotation_path)
    results = []

    for dirpath, _, filenames in os.walk(image_root):
        for fname in tqdm(filenames, desc="Processing"):
            if not fname.lower().endswith(('.jpg', '.png', '.jpeg')):
                continue

            full_path = os.path.join(dirpath, fname)
            rel_path = os.path.relpath(full_path, image_root).replace("\\", "/")
            image_key = f"image_customer/{rel_path}"

            start = time.time()
            detections = detector.detect(full_path)
            duration = time.time() - start

            if detections:
                pred_box = detections[0]  # (x1, y1, x2, y2)
            else:
                pred_box = (0, 0, 0, 0)

            gt_box = annotations.get(image_key, (0, 0, 0, 0))
            iou = calculate_iou(pred_box, gt_box)
            dist = calculate_center_distance(pred_box, gt_box)

            results.append({
                'file_name': image_key,
                'x1': pred_box[0],
                'y1': pred_box[1],
                'x2': pred_box[2],
                'y2': pred_box[3],
                'IoU': round(iou, 4),
                'center_distance': round(dist, 2),
                'inference_time': round(duration, 4)
            })

    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"[INFO] Kết quả đã được lưu tại: {output_csv}")

# ===================== PHẦN 5: CHẠY =====================

if __name__ == "__main__":
    image_root = '/content/drive/MyDrive/Report/data/image_customer'
    model_path = '/content/drive/MyDrive/Report/model/best.pt'
    annotation_path = '/content/drive/MyDrive/Report/data/annotations.xml'
    output_csv = '/content/drive/MyDrive/Report/result/Yolo_face_detection.csv'

    evaluate_yolo_face(image_root, model_path, annotation_path, output_csv)


Processing: 0it [00:00, ?it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  5.60it/s]
Processing: 100%|██████████| 4/4 [00:01<00:00,  3.97it/s]
Processing: 100%|██████████| 4/4 [00:01<00:00,  3.89it/s]
Processing: 100%|██████████| 4/4 [00:01<00:00,  4.00it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  4.88it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  5.76it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.00it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.97it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.63it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.62it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  7.28it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.42it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  5.89it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  6.73it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  7.27it/s]
Processing: 100%|██████████| 4/4 [00:00<00:00,  7.12it/s]
Processing: 100%|██████████| 4/4 [00:00<0

[INFO] Kết quả đã được lưu tại: /content/drive/MyDrive/Report/result/Yolo_face_detection.csv





# Retinaface

In [5]:
pip install insightface onnxruntime

Collecting insightface
  Downloading insightface-0.7.3.tar.gz (439 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/439.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m204.8/439.5 kB[0m [31m5.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting onnxruntime
  Downloading onnxruntime-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting onnx (from insightface)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (

In [8]:
import os
import cv2
import time
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from insightface.model_zoo import model_zoo

# ===================== PHẦN 1: ĐỌC DỮ LIỆU GROUND TRUTH =====================

def load_annotations(xml_file):
    """Tải dữ liệu ground truth từ file XML"""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    data = {}
    for image in root.findall("image"):
        file_name = image.attrib["name"]  # Đã bao gồm 'image_customer/...'
        box = image.find("box")
        if box is not None:
            x1, y1 = float(box.attrib["xtl"]), float(box.attrib["ytl"])
            x2, y2 = float(box.attrib["xbr"]), float(box.attrib["ybr"])
            data[file_name] = (x1, y1, x2, y2)
    return data

# ===================== PHẦN 2: TÍNH TOÁN IoU VÀ CENTER DISTANCE =====================

def compute_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)
    if interArea == 0:
        return 0.0
    boxAArea = max(1e-6, (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = max(1e-6, (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
    return interArea / (boxAArea + boxBArea - interArea)

def compute_center_distance(box1, box2):
    cx1 = (box1[0] + box1[2]) / 2
    cy1 = (box1[1] + box1[3]) / 2
    cx2 = (box2[0] + box2[2]) / 2
    cy2 = (box2[1] + box2[3]) / 2
    return np.sqrt((cx1 - cx2)**2 + (cy1 - cy2)**2)

# ===================== PHẦN 3: XỬ LÝ ẢNH VỚI RETINAFACE =====================

def process_images(base_folder, det_model_path, annotation_path):
    annotations = load_annotations(annotation_path)
    records = []

    # Khởi tạo RetinaFace detector
    det_model = model_zoo.get_model(det_model_path)
    det_model.prepare(ctx_id=0, input_size=(640, 640), det_thresh=0.5)

    for root, _, files in os.walk(base_folder):
        for file in files:
            if not file.lower().endswith((".jpg", ".png", ".jpeg")):
                continue

            file_path = os.path.join(root, file)
            rel_path = os.path.relpath(file_path, base_folder).replace("\\", "/")
            image_key = f"image_customer/{rel_path}"  # ✅ KHÔNG thêm "image_customer/" vì đã có trong annotation

            image = cv2.imread(file_path)
            if image is None:
                continue

            start_time = time.time()
            bboxes, _ = det_model.detect(image, max_num=0, metric='default')
            inference_time = time.time() - start_time

            pred_box = [0, 0, 0, 0]
            if bboxes is not None and len(bboxes) > 0:
                bboxes = sorted(bboxes, key=lambda x: x[4], reverse=True)
                x1, y1, x2, y2, _ = bboxes[0]
                pred_box = [x1, y1, x2, y2]

            gt_box = annotations.get(image_key, [0, 0, 0, 0])

            if gt_box == [0, 0, 0, 0]:
                print(f"[WARNING] Không tìm thấy ground truth cho {image_key}")

            iou = compute_iou(pred_box, gt_box)
            center_distance = compute_center_distance(pred_box, gt_box)

            records.append({
                "file_name": image_key,
                "x1": pred_box[0],
                "y1": pred_box[1],
                "x2": pred_box[2],
                "y2": pred_box[3],
                "IoU": round(iou, 4),
                "center_distance": round(center_distance, 2),
                "inference_time": round(inference_time, 4)
            })

    return pd.DataFrame(records)

# ===================== PHẦN 4: CHẠY ĐÁNH GIÁ =====================

def evaluate_face_detection(image_folder, det_model_path, annotation_file, output_csv):
    """Đánh giá phát hiện khuôn mặt với RetinaFace và lưu kết quả vào CSV"""
    results_df = process_images(image_folder, det_model_path, annotation_file)
    results_df.to_csv(output_csv, index=False)
    print(f"[INFO] Đã lưu kết quả vào {output_csv}")


In [9]:
image_folder = "/content/drive/MyDrive/Report/data/image_customer"
det_model_path = "/content/drive/MyDrive/Report/model/det_10g.onnx"
annotation_file = "/content/drive/MyDrive/Report/data/annotations.xml"
output_csv = "/content/drive/MyDrive/Report/result/RetinaFace_face_detection.csv"

evaluate_face_detection(image_folder, det_model_path, annotation_file, output_csv)


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
[INFO] Đã lưu kết quả vào /content/drive/MyDrive/Report/result/RetinaFace_face_detection.csv
