<변경점>  
1. Dlib 약 0.5초 정도 걸림

1. 사람 + 총 + 칼 인식    
2. Arcface를 사람마다 첫 20프레임 동안 적용(Me, NotME).   
3. NotMe이면서 사람과 총 혹은 칼의 bounding box가 겹치는 경우에는 위험인으로 분류 -> 한 번 위험인으로 분류된 사람의 ID는 dangerous_ids로 관리됨 -> dangerous_ids 중 하나에 해당하는 사람은 위에 "Dangerous person"이라고 뜸
4. pose estimation을 dangerous person에 대해 적용(왼팔들기, 오른팔들기, 양팔들기)  
5. 출력화면 사이즈를 키움  
6. 버그수정: 초기에 무기와 교차하여 dangerous_ids에 들어갔더라도, ArcFace로 내 얼굴임이 확인되면(=나 자신이 무기를 소지한 상황) 그 사람을 위험 인물에서 제외  
7. Kobukki robot이 dangerous_ids 내에 있는 사람을 쫓아가도록 하면 될 듯
8. warning 해결: pip install --upgrade albumentations
9. 사람인 경우에 segmentation으로 윤곽선 치기(성공)


In [8]:
# dlib_embedding_create.py
import cv2
import numpy as np
import os
import face_recognition

my_face_folder = "C:/Users/idea0/EE101/Jongsul/myface"
embeddings = []

for file in os.listdir(my_face_folder):
    if file.lower().endswith(('.jpg', '.png', '.jpeg')):
        path = os.path.join(my_face_folder, file)
        img = cv2.imread(path)
        if img is None:
            continue
        # BGR->RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # face_recognition.face_encodings → list of 128-dim
        encs = face_recognition.face_encodings(img_rgb)
        if len(encs) > 0:
            embeddings.append(encs[0])

if len(embeddings) == 0:
    raise ValueError("No embeddings generated from dlib.")

# 평균 임베딩(128차원)
avg_embedding = np.mean(embeddings, axis=0)
print("dlib embedding shape:", avg_embedding.shape)  # (128,)

np.save("my_dlib_embedding.npy", avg_embedding)
print("Saved my_dlib_embedding.npy")

dlib embedding shape: (128,)
Saved my_dlib_embedding.npy


In [1]:
import cv2
import mediapipe as mp
import time
import torch
import numpy as np
import warnings

from ultralytics import YOLO
from sklearn.metrics.pairwise import cosine_similarity
from deep_sort_realtime.deepsort_tracker import DeepSort

############################
# 0) dlib-based face_recognition
############################
import face_recognition

def get_dlib_embedding(image_bgr):
    """
    dlib 기반 128차원 임베딩을 추출하는 함수.
    """
    if image_bgr.size == 0:
        return None
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    encodings = face_recognition.face_encodings(image_rgb)
    if len(encodings) == 0:
        return None
    return encodings[0]  # (128,)

def is_my_face(face_embedding, my_embedding, threshold=0.4):
    """
    dlib 임베딩 간의 유사도(코사인)로 '내 얼굴인지' 판별.
    threshold=0.4는 예시(실제로는 0.6~0.7로 조정할 수도 있음)
    """
    sim = cosine_similarity([face_embedding], [my_embedding])[0][0]
    return (sim > threshold), sim

############################
# 1) 모델 및 함수 초기화
############################
warnings.filterwarnings("ignore", category=FutureWarning)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = YOLO("C:/Users/idea0/EE101/Jongsul/Yolomodels/epoch180.pt").to(device)
model_seg = YOLO("yolov8n-seg.pt").to(device)

# dlib 임베딩 파일 로드
my_face_embedding = np.load("my_dlib_embedding.npy")  # (128,)

############################
# Pose: Dangerous person
############################
mp_pose = mp.solutions.pose
pose_danger = mp_pose.Pose(
    static_image_mode=True,
    model_complexity=1,
    enable_segmentation=False,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
)
mp_drawing = mp.solutions.drawing_utils

LEFT_SHOULDER = 11
RIGHT_SHOULDER= 12
LEFT_WRIST   = 15
RIGHT_WRIST  = 16

def is_arm_raised(shoulder_y, wrist_y, threshold=0.05):
    return wrist_y < (shoulder_y - threshold)

def boxes_overlap(boxA, boxB):
    (x1A, y1A, x2A, y2A) = boxA
    (x1B, y1B, x2B, y2B) = boxB
    overlap_x = not (x2A < x1B or x2B < x1A)
    overlap_y = not (y2A < y1B or y2B < y1A)
    return (overlap_x and overlap_y)

############################
# 2) DeepSORT 초기화
############################
tracker = DeepSort(
    max_age=30,
    n_init=3,
    nms_max_overlap=1.0,
    embedder='mobilenet',
    half=True,
    embedder_gpu=True
)

############################
# 3) 메인 루프
############################
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("카메라를 열 수 없습니다.")
    exit()

dangerous_ids = set()
track_me_status = {}
track_arcface_count= {}
MAX_ARCFACE_FRAMES= 20
sim = 0

window_name = "DeepSORT + YOLO(SEG) + dlib + Pose"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name, 960,720)

prev_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        print("프레임 읽기 실패!")
        break

    # YOLO 추론
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = model(rgb_frame, conf=0.5)
    results_seg = model_seg(rgb_frame, conf=0.5)

    # YOLO detection
    det = results[0]
    boxes2 = det.boxes
    masks2 = results_seg[0].masks

    person_detections= []
    weapon_boxes= []

    # 1) 사람 & 무기 인식
    if boxes2 is not None:
        for i, box in enumerate(boxes2):
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            class_id = int(box.cls[0])
            conf = float(box.conf[0])

            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0),2)
            label = f"{model.names[class_id]}: {conf:.2f}"
            cv2.putText(frame, label, (x1,y1-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0),2)

            # 세그(사람만 윤곽선)
            if masks2 is not None and class_id==0:
                if i < len(masks2.data):
                    single_mask = masks2.data[i].cpu().numpy()
                    mask_bin = (single_mask>0.5).astype(np.uint8)
                    contours,_= cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    cv2.drawContours(frame, contours, -1, (0,255,255), 2)

            # DeepSORT
            if class_id==0:
                w = x2 - x1
                h = y2 - y1
                person_detections.append(((x1,y1,w,h), conf, 0))
            elif class_id in [1,2]:
                weapon_boxes.append((x1,y1,x2,y2))

    # 2) DeepSORT update
    tracks = tracker.update_tracks(person_detections, frame=rgb_frame)
    tracked_boxes=[]
    for t in tracks:
        if not t.is_confirmed() or t.time_since_update>1:
            continue
        tid= t.track_id
        l,t_,r,b_ = map(int,t.to_ltrb())
        tracked_boxes.append((tid, l,t_,r,b_))
        cv2.putText(frame, f"ID:{tid}", (l-10,t_-10),
                    cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,255),2)

    # 3) dlib 로직(기존 facenet 위치)
    time1 = time.time()
    for (tid, px1, py1, px2, py2) in tracked_boxes:
        if tid not in track_me_status:
            track_me_status[tid] = False
        if tid not in track_arcface_count:
            track_arcface_count[tid] = 0

        #if track_me_status[tid]==False and track_arcface_count[tid]<MAX_ARCFACE_FRAMES:
        if True:
            track_arcface_count[tid]+=1
            # dlib 임베딩
            PAD= 10
            sub_face= frame[max(0,py1-PAD):py2+PAD, max(0,px1-PAD):px2+PAD]
            if sub_face.size==0:
                continue
            emb= get_dlib_embedding(sub_face)
            if emb is not None:
                same_person, sim= is_my_face(emb, my_face_embedding, threshold=0.4)
                if same_person:
                    track_me_status[tid]=True
                    # dangerous_ids에서 제거
                    if tid in dangerous_ids:
                        dangerous_ids.remove(tid)

        # 시각화
        if track_me_status[tid]:
            text_arc= f"          Me(sim={sim:.2f})"
            color=(0,255,0)
        else:
            text_arc= f"          NotMe(sim={sim:.2f})"
            color=(0,0,255)
        cv2.putText(frame, text_arc, (px1,py1-10),
                    cv2.FONT_HERSHEY_SIMPLEX,0.6,color,2)
    time2 = time.time()
    print("Dlib running time: ", time2 - time1)
    # 4) 무기 교차 & NotMe => dangerous
    for (tid, px1, py1, px2, py2) in tracked_boxes:
        if not track_me_status[tid]:
            pbox= (px1, py1, px2, py2)
            for wb in weapon_boxes:
                if boxes_overlap(pbox, wb):
                    dangerous_ids.add(tid)
                    break

    # 5) Dangerous => Mediapipe pose
    for (tid, px1, py1, px2, py2) in tracked_boxes:
        if tid in dangerous_ids:
            sub= frame[py1:py2, px1:px2]
            if sub.size==0:
                continue
            c_rgb= cv2.cvtColor(sub, cv2.COLOR_BGR2RGB)
            pose_result= pose_danger.process(c_rgb)
            if pose_result.pose_landmarks:
                lms= pose_result.pose_landmarks.landmark
                sub_w= px2 - px1
                sub_h= py2 - py1

                left_shoulder_y= lms[LEFT_SHOULDER].y
                right_shoulder_y= lms[RIGHT_SHOULDER].y
                left_wrist_y= lms[LEFT_WRIST].y
                right_wrist_y= lms[RIGHT_WRIST].y

                la_up= (left_wrist_y< (left_shoulder_y-0.05))
                ra_up= (right_wrist_y<(right_shoulder_y-0.05))
                if la_up and ra_up:
                    a_text= "both arms up"
                elif la_up:
                    a_text= "left arm up"
                elif ra_up:
                    a_text= "right arm up"
                else:
                    a_text= "do nothing"

                for lm in lms:
                    cx= px1+int(lm.x*sub_w)
                    cy= py1+int(lm.y*sub_h)
                    cv2.circle(frame,(cx,cy),3,(0,255,255),-1)
                cv2.putText(frame,f"Dangerous person: {a_text}",
                            (px1,py1+20),
                            cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2)

    # FPS
    now= time.time()
    fps= 1.0/(now - prev_time)
    prev_time= now
    cv2.putText(frame,f"FPS:{fps:.2f}",(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)

    cv2.imshow(window_name,frame)
    if cv2.waitKey(1)&0xFF==ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 personnnn, 116.0ms
Speed: 2.0ms preprocess, 116.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 171.0ms
Speed: 2.0ms preprocess, 171.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.0

0: 480x640 1 personnnn, 103.0ms
Speed: 2.0ms preprocess, 103.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 143.0ms
Speed: 2.0ms preprocess, 143.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.0

0: 480x640 1 personnnn, 90.0ms
Speed: 3.0ms preprocess, 90.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 102.8ms
Speed: 2.0ms preprocess, 102.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.5154731273651123

0: 480x640 1 personnnn, 87.0ms
Speed: 2.0ms preprocess, 87.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x6

Dlib running time:  0.4960622787475586

0: 480x640 1 personnnn, 90.0ms
Speed: 1.0ms preprocess, 90.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 125.0ms
Speed: 2.0ms preprocess, 125.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.5077965259552002

0: 480x640 1 personnnn, 88.0ms
Speed: 2.0ms preprocess, 88.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 120.0ms
Speed: 2.0ms preprocess, 120.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.501049280166626

0: 480x640 1 personnnn, 109.0ms
Speed: 3.0ms preprocess, 109.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 128.0ms
Speed: 2.0ms preprocess, 128.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)
Dlib running time:  0.5113086700439453

0: 480x640 1 personnnn, 96.0ms
Speed: 2.0ms preprocess, 96.0ms inferenc

In [1]:
import torch
import sys

print("python version:", sys.version)
print("torch version:", torch.__version__)
print("CUDA Version:", torch.version.cuda)
print("CUDA Available:", torch.cuda.is_available())

python version: 3.8.5 (default, Sep  3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)]
torch version: 1.12.1+cu116
CUDA Version: 11.6
CUDA Available: False


In [18]:
#pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116

^C
Note: you may need to restart the kernel to use updated packages.
