In [1]:
import cv2
import numpy as np
import os
import dlib
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from sklearn.metrics.pairwise import cosine_similarity

############################
# 1) Dlib 모델 초기화
############################
PREDICTOR_PATH = "C:/face/shape_predictor_68_face_landmarks.dat"
FACE_REC_MODEL_PATH = "C:/face/dlib_face_recognition_resnet_model_v1.dat"
MYFACE_DIR = "C:/myface"

# Dlib 모델 로드
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(PREDICTOR_PATH)
face_rec_model = dlib.face_recognition_model_v1(FACE_REC_MODEL_PATH)

# 저장된 얼굴 임베딩 로드
if os.path.exists("my_face_embedding.npy"):
    my_face_embedding = np.load("my_face_embedding.npy")
else:
    raise FileNotFoundError("my_face_embedding.npy 파일이 존재하지 않습니다. 먼저 얼굴을 학습시키세요!")

############################
# 2) 얼굴 학습 (등록) 함수
############################
def get_face_embedding(image):
    """ 얼굴 임베딩 추출 (Dlib) """
    faces = detector(image)
    if len(faces) > 0:
        shape = predictor(image, faces[0])
        face_descriptor = face_rec_model.compute_face_descriptor(image, shape)
        return np.array(face_descriptor)
    return None

def generate_average_embedding(folder_path):
    """ 등록된 얼굴 사진으로 평균 임베딩 생성 """
    embeddings = []
    for file in os.listdir(folder_path):
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(folder_path, file)
            image = cv2.imread(img_path)
            if image is None:
                print(f"이미지 로드 실패: {img_path}")
                continue

            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            embedding = get_face_embedding(image_rgb)
            if embedding is not None:
                embeddings.append(embedding)
            else:
                print(f"얼굴 검출 실패: {img_path}")

    if len(embeddings) == 0:
        raise ValueError("임베딩을 하나도 생성하지 못했습니다.")
    
    avg_embedding = np.mean(embeddings, axis=0)
    np.save("my_face_embedding.npy", avg_embedding)
    print("✅ 내 얼굴 평균 임베딩 저장 완료!")

############################
# 3) 얼굴 임베딩 비교 함수
############################
def is_my_face(face_embedding, my_face_embedding, threshold=0.6):
    """ Dlib 임베딩을 사용하여 유사도 비교 """
    if face_embedding is None:
        return False, 0.0
    similarity = 1 - np.linalg.norm(my_face_embedding - face_embedding)  # 거리 기반 유사도
    return similarity > threshold, similarity

############################
# 4) YOLO + DeepSORT 초기화
############################
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# YOLO 모델 로드
detection_model = YOLO("C:/epoch180.pt").to(device)
segmentation_model = YOLO("yolov8n-seg.pt").to(device)
face_model = YOLO("C:/face1.pt").to(device)

# DeepSORT 트래커 초기화
tracker = DeepSort(max_age=30, n_init=3, embedder='mobilenet', half=True, embedder_gpu=True)

############################
# 5) 얼굴 탐지 및 Segmentation
############################
def detect_faces(image):
    """ YOLO를 사용하여 얼굴 탐지 """
    face_bboxes = []
    face_results = face_model(image)
    for box in face_results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf)
        face_bboxes.append((x1, y1, x2, y2, conf))
    return face_bboxes

def apply_segmentation(image, bboxes):
    """ Segmentation 모델을 사용하여 사람 영역 내 마스크 적용 """
    for (x1, y1, x2, y2) in bboxes:
        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue
        segmentation_results = segmentation_model(person_crop)
        if segmentation_results[0].masks is not None:
            for mask in segmentation_results[0].masks.data:
                mask = mask.cpu().numpy()
                mask_resized = cv2.resize(mask, (x2 - x1, y2 - y1))
                mask_binary = (mask_resized > 0.5).astype(np.uint8)
                color_mask = np.zeros_like(person_crop, dtype=np.uint8)
                color_mask[:, :, 1] = mask_binary * 255
                person_crop = cv2.addWeighted(person_crop, 1, color_mask, 0.5, 0)
                image[y1:y2, x1:x2] = person_crop
    return image

############################
# 6) 실시간 웹캠 실행
############################
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("웹캠을 열 수 없습니다.")
    exit()

window_name = "YOLOv8 + DeepSORT + Segmentation + Dlib Face Recognition"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name, 960, 720)

while True:
    ret, frame = cap.read()
    if not ret:
        print("프레임 읽기 실패!")
        break

    # YOLO 사람 탐지 실행
    detection_results = detection_model(frame)
    bboxes = []
    detections = []

    for box in detection_results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        class_id = int(box.cls)
        conf = float(box.conf)

        # 사람(클래스 ID 0)만 탐지
        if class_id == 0:
            bboxes.append((x1, y1, x2, y2))
            detections.append(((x1, y1, x2 - x1, y2 - y1), conf, 0))

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"Person {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Segmentation 적용
    frame = apply_segmentation(frame, bboxes)

    # 얼굴 탐지 및 인식
    face_bboxes = detect_faces(frame)
    for x1, y1, x2, y2, conf in face_bboxes:
        face_crop = frame[y1:y2, x1:x2]
        face_embedding = get_face_embedding(face_crop)

        if face_embedding is not None:
            is_me, similarity = is_my_face(face_embedding, my_face_embedding)
            color = (0, 255, 0) if is_me else (0, 0, 255)
            cv2.putText(frame, f"Me: {similarity:.2f}" if is_me else "Not Me", (x1, y1 + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    cv2.imshow(window_name, frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 1 personnnn, 123.0ms
Speed: 0.0ms preprocess, 123.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 448x640 1 person, 179.6ms
Speed: 3.8ms preprocess, 179.6ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)

0: 480x640 (no detections), 102.2ms
Speed: 3.7ms preprocess, 102.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 personnnn, 1 knifeeeee, 95.9ms
Speed: 2.0ms preprocess, 95.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 416x640 2 persons, 131.8ms
Speed: 0.0ms preprocess, 131.8ms inference, 0.0ms postprocess per image at shape (1, 3, 416, 640)

0: 480x640 (no detections), 89.1ms
Speed: 0.0ms preprocess, 89.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 personnnn, 87.3ms
Speed: 0.0ms preprocess, 87.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 416x640 2 persons, 90.7ms
Speed: 2.0ms preprocess, 90.7ms inference, 