1. YOLO로 사람/총/칼 인식
2. Arcface로 얼굴 인식
3. 특정인에 대해서만! MediaPipe로 포즈 인식


In [1]:
import cv2
import mediapipe as mp
import time
import torch
import numpy as np

from ultralytics import YOLO
from sklearn.metrics.pairwise import cosine_similarity
from insightface.app import FaceAnalysis

#######################
# 1) 모델 및 함수 초기화
#######################
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# YOLO 모델 로드
model = YOLO("yolov8n.pt").to(device)
# (사용자 모델이 있다면 아래처럼 교체)
# model = YOLO("C:/path/to/your_custom_model.pt").to(device)

# ArcFace 초기화
arc_app = FaceAnalysis(name="buffalo_l")
arc_app.prepare(ctx_id=-1, det_size=(640, 640))  # CPU 사용 예시 (ctx_id=-1)

# 내 얼굴 임베딩 로드
my_face_embedding = np.load("my_face_embedding.npy")

def get_face_embedding(arc_app, face_img_bgr):
    faces = arc_app.get(face_img_bgr)
    if len(faces) == 0:
        return None
    return faces[0].embedding

def is_my_face(face_embedding, my_embedding, threshold=0.4):
    sim = cosine_similarity([face_embedding], [my_embedding])[0][0]
    return (sim > threshold), sim

#######################
# 2) Mediapipe Pose 초기화 (단일인물용)
#######################
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Pose 객체는 계속 재사용 가능 (static_image_mode=True 권장)
pose_estimator = mp_pose.Pose(
    static_image_mode=True,  # 크롭된 정적 이미지를 처리할 때 유리
    model_complexity=1,
    enable_segmentation=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# 팔 들기 판별용
LEFT_SHOULDER = 11
RIGHT_SHOULDER = 12
LEFT_WRIST = 15
RIGHT_WRIST = 16

def is_arm_raised(shoulder_y, wrist_y, threshold=0.05):
    # Mediapipe Pose: y=0이 화면 상단, 1이 하단 (값이 작을수록 더 위)
    return wrist_y < (shoulder_y - threshold)

#######################
# 3) 웹캠 실행
#######################
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("카메라를 열 수 없습니다.")
    exit()

prev_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        print("프레임을 읽어올 수 없습니다.")
        break

    # (1) YOLO 추론
    #    -> 모든 객체(특히 person)의 바운딩박스 검출
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results_yolo = model(rgb_frame)

    # (2) 바운딩박스 순회
    for box in results_yolo[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        class_id = int(box.cls)
        conf = float(box.conf)
        class_name = model.names[class_id]

        # 바운딩박스 표시
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        label = f"{class_name}: {conf:.2f}"
        cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)

        # 사람(person)만 처리
        if class_name.lower() == "person":
            person_crop = frame[y1:y2, x1:x2]  # BGR crop
            if person_crop.size == 0:
                continue

            # (3) ArcFace로 "내 얼굴"인지 판별
            face_embedding = get_face_embedding(arc_app, person_crop)
            if face_embedding is not None:
                same_person, sim = is_my_face(face_embedding, my_face_embedding, threshold=0.4)
                if same_person:
                    color = (0, 255, 0)
                    text_face = f"Me! (sim={sim:.2f})"
                else:
                    color = (0, 0, 255)
                    text_face = f"Not me (sim={sim:.2f})"

                cv2.putText(frame, text_face, (x1, y1 - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                # (4) "나"로 판별된 경우만 Pose 인식
                if same_person:
                    # 4-1) crop영역을 RGB로 변환
                    person_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
                    
                    # 4-2) Mediapipe Pose 수행 (단일인물 가정)
                    pose_result = pose_estimator.process(person_rgb)

                    if pose_result.pose_landmarks:
                        # 4-3) 랜드마크는 [0,1] 범위로 normalize된 값
                        #      -> crop영역 픽셀 좌표로 변환 후, 다시 원본 frame 좌표로 offset
                        landmarks = pose_result.pose_landmarks.landmark

                        # 팔 들기 판단
                        left_shoulder_y = landmarks[LEFT_SHOULDER].y
                        right_shoulder_y = landmarks[RIGHT_SHOULDER].y
                        left_wrist_y = landmarks[LEFT_WRIST].y
                        right_wrist_y = landmarks[RIGHT_WRIST].y

                        # 오프셋 보정: subimage의 top-left = (x1,y1)
                        # subimage height = (y2 - y1)
                        # subimage width  = (x2 - x1)

                        sub_h = (y2 - y1)
                        sub_w = (x2 - x1)

                        # y좌표(0~1)에 대해서만 예시
                        left_arm_up = is_arm_raised(left_shoulder_y, left_wrist_y)
                        right_arm_up = is_arm_raised(right_shoulder_y, right_wrist_y)

                        # 간단 동작 판별
                        if left_arm_up and right_arm_up:
                            action_text = "Both arms up"
                        elif left_arm_up:
                            action_text = "Left arm up"
                        elif right_arm_up:
                            action_text = "Right arm up"
                        else:
                            action_text = "Arms down"

                        cv2.putText(frame, action_text, (x1, y1 - 60),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,255), 2)

                        # 4-4) 랜드마크 시각화 (옵션)
                        #     mp_drawing.draw_landmarks()를 직접 subimage 위에 그린 뒤,
                        #     다시 원본 frame에 붙여 넣거나
                        #     혹은 landmark 좌표를 변환하여 원본 frame에 draw
                        # 여기서는 간단히 landmark 좌표 계산 예시:
                        for i, lm in enumerate(landmarks):
                            px = int(lm.x * sub_w) + x1
                            py = int(lm.y * sub_h) + y1
                            cv2.circle(frame, (px, py), 3, (255, 0, 255), -1)

    # FPS 계산
    current_time = time.time()
    fps = 1.0 / (current_time - prev_time)
    prev_time = current_time
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow("Pose(Only Me)", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

  check_for_updates()


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\idea0/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\idea0/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\idea0/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\idea0/.insightface\models\buffalo_l\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\idea0/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 135.0ms
Speed: 3.0ms preprocess, 135.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 93.9ms
Speed: 4.0ms preprocess, 93.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 159.0ms
Speed: 4.0ms preprocess, 159.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 91.1ms
Speed: 2.0ms preprocess, 91.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 97.0ms
Speed: 2.0ms preprocess, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 97.0ms
Speed: 4.0ms preprocess, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 92.0ms
Speed: 4.0ms preprocess, 92.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 100.0ms
Speed: 2.0ms preprocess, 100.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 82.0ms
Speed: 3.0ms preprocess, 82.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 109.0ms
Speed: 2.0ms preprocess, 109.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 84.0ms
Speed: 1.0ms preprocess, 84.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 87.0ms
Speed: 3.0ms preprocess, 87.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 86.0ms
Speed: 3.0ms preprocess, 86.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 85.0ms
Speed: 2.0ms preprocess, 85.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 96.0ms
Speed: 2.0ms preprocess, 96.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 75.0ms
Speed: 11.0ms preprocess, 75.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 78.0ms
Speed: 15.0ms preprocess, 78.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 79.0ms
Speed: 13.0ms preprocess, 79.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 75.9ms
Speed: 2.0ms preprocess, 75.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 85.0ms
Speed: 2.0ms preprocess, 85.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 82.0ms
Speed: 2.0ms preprocess, 82.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 83.3ms
Speed: 2.0ms preprocess, 83.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 88.0ms
Speed: 2.0ms preprocess, 88.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 91.0ms
Speed: 2.0ms preprocess, 91.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 80.0ms
Speed: 9.0ms preprocess, 80.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 78.0ms
Speed: 11.0ms preprocess, 78.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 85.0ms
Speed: 3.0ms preprocess, 85.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 103.0ms
Speed: 3.0ms preprocess, 103.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 113.0ms
Speed: 3.0ms preprocess, 113.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 127.0ms
Speed: 1.0ms preprocess, 127.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 90.0ms
Speed: 7.0ms preprocess, 90.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 97.0ms
Speed: 2.0ms preprocess, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 98.0ms
Speed: 3.0ms preprocess, 98.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 104.0ms
Speed: 2.0ms preprocess, 104.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 115.0ms
Speed: 9.0ms preprocess, 115.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 92.0ms
Speed: 2.0ms preprocess, 92.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 101.0ms
Speed: 2.0ms preprocess, 101.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 96.0ms
Speed: 3.0ms preprocess, 96.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 80.0ms
Speed: 2.0ms preprocess, 80.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 96.0ms
Speed: 14.0ms preprocess, 96.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 94.0ms
Speed: 6.0ms preprocess, 94.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 94.0ms
Speed: 4.0ms preprocess, 94.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 91.6ms
Speed: 12.0ms preprocess, 91.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 76.0ms
Speed: 10.0ms preprocess, 76.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 89.0ms
Speed: 14.0ms preprocess, 89.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 108.0ms
Speed: 16.0ms preprocess, 108.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 91.0ms
Speed: 7.0ms preprocess, 91.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 79.0ms
Speed: 5.0ms preprocess, 79.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 94.0ms
Speed: 21.0ms preprocess, 94.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 85.0ms
Speed: 11.0ms preprocess, 85.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 2 persons, 2 toothbrushs, 97.0ms
Speed: 9.0ms preprocess, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 1 toothbrush, 79.0ms
Speed: 3.0ms preprocess, 79.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 1 toothbrush, 114.0ms
Speed: 15.0ms preprocess, 114.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 3 toothbrushs, 92.0ms
Speed: 8.0ms preprocess, 92.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 2 toothbrushs, 96.0ms
Speed: 17.0ms preprocess, 96.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 5 toothbrushs, 87.0ms
Speed: 18.0ms preprocess, 87.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 3 toothbrushs, 96.0ms
Speed: 21.0ms preprocess, 96.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 3 toothbrushs, 84.0ms
Speed: 16.0ms preprocess, 84.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 4 toothbrushs, 81.0ms
Speed: 19.0ms preprocess, 81.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 2 toothbrushs, 90.0ms
Speed: 1.0ms preprocess, 90.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 91.0ms
Speed: 9.0ms preprocess, 91.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 87.0ms
Speed: 3.0ms preprocess, 87.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 2 persons, 93.1ms
Speed: 15.0ms preprocess, 93.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4





  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4


0: 480x640 1 person, 111.0ms
Speed: 3.0ms preprocess, 111.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4



0: 480x640 1 person, 143.0ms
Speed: 4.0ms preprocess, 143.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
