In [1]:
import openvino as ov
import numpy as np
import cv2

from typing import Tuple, Dict
import torch
from ultralytics import YOLO
from ultralytics.utils import ops
from ultralytics.utils.plotting import colors
from pathlib import Path
import random, time

In [2]:
ov_model = Path("model/yolov8n-pose_openvino_model/yolov8n-pose.xml")
if ov_model.is_file():
    print("OpenVINO model already exists.")

else:
    # Load a YOLOv8n PyTorch model
    model = YOLO("model/yolov8n-pose.pt")
    
    # Export the model
    model.export(format="openvino") 

OpenVINO model already exists.


In [3]:
core = ov.Core()

model = core.read_model(model="model/yolov8n-pose_openvino_model/yolov8n-pose.xml")
pose_compiled_model = core.compile_model(model=model, device_name="CPU")

input_layer = pose_compiled_model.input(0)
output_layer = pose_compiled_model.output(0)
print("Input layer shape: ", input_layer.shape)
print("Output layer shape:", output_layer.shape)

Input layer shape:  [1,3,640,640]
Output layer shape: [1,56,8400]


### PreProcess

In [4]:
def letterbox(img: np.ndarray, new_shape:Tuple[int, int] = (640, 640), 
              color:Tuple[int, int, int] = (114, 114, 114), auto:bool = False, 
              scale_fill:bool = False, scaleup:bool = False, stride:int = 32):

    shape = img.shape[:2]                    
    if isinstance(new_shape, int):           
        new_shape = (new_shape, new_shape)   
    
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])    
    if not scaleup:                                              
        r = min(r, 1.0)                 

    ratio = r, r      
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  
    if auto: 
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  
    elif scale_fill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)    
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) 
    return img, ratio, (dw, dh)


def preprocess_image(img0: np.ndarray):

    img = letterbox(img0)[0]

    # Convert HWC(높이, 너비, 채널) to CHW(채널, 높이, 너비)
    #               0,  1,   2      to       2,   0,   1
    img = img.transpose(2, 0, 1)
    img = np.ascontiguousarray(img)   # 입력된 배열을 C 스타일의 연속된 메모리 레이아웃을 갖는 배열로 변환
    return img


def image_to_tensor(image:np.ndarray):

    input_tensor = image.astype(np.float32)  # uint8 to fp32 : 머신 러닝과 딥 러닝에서 수치 계산에 일반적으로 사용되는 데이터 유형
    input_tensor /= 255.0                    # 0 - 255 to 0.0 - 1.0 정규화  : 딥러닝의 전처리 과정

    # batch 차원 확장  - NCHW(배치크기, 채널, 높이, 너비) 형식 : tensorflow나 keras에서 기본적으로 NCHW 사용
    if input_tensor.ndim == 3:
        input_tensor = np.expand_dims(input_tensor, 0)
    return input_tensor

In [5]:
def postprocess(
    pred_boxes:np.ndarray,
    input_hw:Tuple[int, int],
    orig_img:np.ndarray,
    min_conf_threshold:float = 0.25,
    nms_iou_threshold:float = 0.45,
    agnosting_nms:bool = False,
    max_detections:int = 80,
):

    nms_kwargs = {"agnostic": agnosting_nms, "max_det":max_detections}
    preds = ops.non_max_suppression(
        torch.from_numpy(pred_boxes),
        min_conf_threshold,
        nms_iou_threshold,
        nc=1,
        **nms_kwargs
    )

    results = []

    kpt_shape = [17, 3]
    for i, pred in enumerate(preds):
        shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape
        pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()
        pred_kpts = pred[:, 6:].view(len(pred), *kpt_shape) if len(pred) else pred[:, 6:]
        pred_kpts = ops.scale_coords(input_hw, pred_kpts, shape)
        results.append({"box": pred[:, :6].numpy(), 'kpt': pred_kpts.numpy()})
     
    return results

# <img src="https://learnopencv.com/wp-content/uploads/2021/05/fix-overlay-issue.jpg">

In [6]:
def angle_between_points(a, b, c):

    a = np.array(a)
    b = np.array(b)
    c = np.array(c)
    ba = a - b
    bc = c - b  

    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(cosine_angle)

    return int(np.degrees(angle))

In [27]:
def plot_one_box(box:np.ndarray, img:np.ndarray, color:Tuple[int, int, int] = None, 
                 keypoints:np.ndarray = None, label:str = None, line_thickness:int = 5):

    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    angle = 180

    if keypoints is not None:
        a, b, c = None, None, None
        skeleton = [[9, 7], [7, 5]]

        # 관절 point 그리기
        shape = img.shape[:2]
        for i, k in enumerate(keypoints):
            if i > 4 and i < 10:
                x_coord, y_coord = k[0], k[1]

                if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
                    if len(k) == 3:
                        if k[2] < 0.5:
                            continue

                    if i == 5:
                        a = (x_coord, y_coord)
                        cv2.circle(img, (int(x_coord),int(y_coord)), 5, (255, 51, 255), -1, cv2.LINE_AA)

                    if i == 7:
                        b = (x_coord, y_coord)
                        cv2.circle(img, (int(x_coord),int(y_coord)), 10, (127, 0, 255), -1, cv2.LINE_AA)
                        #각도 표기하기
                        putAngle = (int(x_coord) - 70, int(y_coord) - 20)

                    if i == 9:
                        c = (x_coord, y_coord)
                        cv2.circle(img, (int(x_coord),int(y_coord)), 5, (255, 51, 255), -1, cv2.LINE_AA)

                    if None not in (a, b, c):
                        angle = angle_between_points(a, b, c)
                        cv2.putText(img, str(angle), putAngle, cv2.FONT_HERSHEY_PLAIN, 1, (255, 153, 51), 3, cv2.LINE_8)
        ## 관절 선 그리기
        ndim = keypoints.shape[-1]
        for i, sk in enumerate(skeleton):

            pos1 = (int(keypoints[(sk[0]), 0]), int(keypoints[(sk[0]), 1]))
            pos2 = (int(keypoints[(sk[1]), 0]), int(keypoints[(sk[1]), 1]))
           
            if ndim == 3:
                conf1 = keypoints[(sk[0]), 2]
                conf2 = keypoints[(sk[1]), 2]
                if conf1 < 0.5 or conf2 < 0.5:
                    continue
                    
            if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
                continue
                
            if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
                continue

            cv2.line(img, pos1, pos2, (0, 255, 0), thickness=2, lineType=cv2.LINE_AA)


    return img, angle

In [28]:
def draw_results(results:Dict, source_image:np.ndarray):

    boxes = results["box"]
    keypoints = results["kpt"]                                   
    
    h, w = source_image.shape[:2]
    angle = 180
    for idx, (*xyxy, conf, lbl) in enumerate(boxes):
        if conf < 0.4:
            continue
        label = f'{"person"} {conf:.2f}'
        kp = keypoints[idx] if keypoints is not None else None
        source_image, angle = plot_one_box(xyxy, source_image, keypoints=kp, label=label, color=colors(int(lbl)), line_thickness=1)
    return source_image, angle

In [29]:
def detect(image:np.ndarray, model:ov.Model):
    preprocessed_image = preprocess_image(image)
    input_tensor = image_to_tensor(preprocessed_image)

    result = model(input_tensor)

    #pro-processing
    boxes = result[model.output(0)]                       # Extracting Boxes : 모델의 출력에서 bounding boxes를 추출
    input_hw = input_tensor.shape[2:]                     # Input Dimensions(차수) 가져오기 : 예측된 상자의 크기를 원래 이미지 크기와 일치하도록 축소하는 데 필수적인 입력 텐서의 높이와 너비를 캡처
    detections = postprocess(pred_boxes=boxes, input_hw=input_hw, orig_img=image)

    return detections

In [30]:
def AddBackground(combined_image, bg):
    bgimg = bg.copy()   
    combined_image_h, combined_image_w = combined_image.shape[0], combined_image.shape[1] 
    new_w = 640
    new_h = int((new_w/combined_image_w)*combined_image_h)
    combined_image_resize = cv2.resize(combined_image, (new_w, new_h))
    xmin = 270
    ymin = 50
    xmax = xmin + combined_image_resize.shape[1]
    ymax = ymin + combined_image_resize.shape[0] 
    bgimg[ymin:ymax, xmin:xmax] = combined_image_resize
    
    return bgimg

In [36]:
def Main():
    camera = cv2.VideoCapture(0)
    bg = cv2.imread("./data/background.jpg")

    score = 0
    quizAngle = random.randint(3, 15) * 10
    waitTime = 1
    
    while(True):
        ret, frame = camera.read()
        if not ret:
            break

        detection = detect(frame, pose_compiled_model)[0]

        image_with_boxes, angle = draw_results(detection, frame)
        
        # Quiz Box
        cv2.rectangle(image_with_boxes, (0,0), (380, 73), (245,117,16), -1)

        cv2.putText(image_with_boxes, 'angle Quiz', (10, 12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv2.LINE_AA)
        cv2.putText(image_with_boxes, str(quizAngle), (20, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

        cv2.putText(image_with_boxes, 'Pose Angle', (150, 12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv2.LINE_AA)
        cv2.putText(image_with_boxes, str(angle), (155, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2, cv2.LINE_AA)

        cv2.putText(image_with_boxes, 'Score', (290, 12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv2.LINE_AA)
        cv2.putText(image_with_boxes, str(score), (290, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0, 255), 2, cv2.LINE_AA)
        gap = abs(quizAngle - angle)
        if gap == 0:
            score += 10
            quizAngle = random.randint(3, 15) * 10
            msg1 = "Correct"
            cv2.putText(image_with_boxes, msg1, (150, 200),
                    cv2.FONT_HERSHEY_SIMPLEX, 3, (0,255,255), 4, cv2.LINE_AA)
            msg2 = "Press any key to continue..."
            cv2.putText(image_with_boxes, msg2, (100, 300),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            waitTime = -1
            
        deployment = AddBackground(image_with_boxes, bg)
        cv2.imshow("Angle Quiz!!", deployment)
        cv2.waitKey(waitTime)
        waitTime = 20
        
        if cv2.waitKey(waitTime) & 0xFF == ord(' '):
            break


    camera.release()
    cv2.destroyAllWindows()

In [38]:
if __name__ == '__main__':
    Main() 