# mp.solutions.pose를 활용한 구현
- 주의점. 해당 파츠를 실행 시 이유는 모르겠지만, detector를 통한 구현이 경로 오류가 발생함

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt


# Initializing mediapipe pose class.
mp_pose = mp.solutions.pose
# Setting up the Pose model for images.
pose_img = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, model_complexity=1)
# Setting up the Pose model for videos.
pose_video = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, 
                          min_tracking_confidence=0.5, model_complexity=1)

# Initializing mediapipe drawing class to draw landmarks on specified image.
mp_drawing = mp.solutions.drawing_utils


def estimPose_img(input_file, pose=pose_img, landmarks_c=(234,63,247), connection_c=(117,249,77), 
                   thickness=20, circle_r=10, display=True):
    
    # Read the input image
    if isinstance(input_file, str) :
        input_img = cv2.imread(input_file)
    else :
        input_img = input_file
    
    # Create a copy of the input image
    output_img = input_img.copy()
    
    # Perform the Pose Detection.
    results = pose.process(output_img) # 이거 이렇게 하면 트래킹은 안되자나..
    # Retrieve the height and width of the input image.
    height, width, _ = input_img.shape
    
    # Initialize a list to store the detected landmarks.
    landmarks = []
    
    # Check if any landmarks are detected.
    if results.pose_landmarks:
        skeleton = np.zeros_like(input_img)
        mp_drawing.draw_landmarks(skeleton, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, 
                                  mp_drawing.DrawingSpec(landmarks_c, thickness, circle_r),
                                  mp_drawing.DrawingSpec(connection_c, thickness, circle_r))
    
        # Draw Pose landmarks on the output image.
        mp_drawing.draw_landmarks(output_img, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, 
                                  mp_drawing.DrawingSpec(landmarks_c, thickness, circle_r),
                                  mp_drawing.DrawingSpec(connection_c, thickness, circle_r))
        
        # Iterate over the detected landmarks.
        for landmark in results.pose_world_landmarks.landmark:
            landmarks.append((landmark.x, landmark.y,
                                  landmark.z, landmark.visibility))
            
    # print(results.pose_landmarks)
    # Check if we want to display.
    if display:
        # Display the original input image and the resulting image.
        plt.figure(figsize=[15,15])
        plt.subplot(121);plt.imshow(input_img[:,:,::-1]);plt.title("Original image");plt.axis('off')
        plt.subplot(122);plt.imshow(output_img[:,:,::-1]);plt.title("Output image");plt.axis('off')
        
        # Plot the Pose landmarks in 3D.
        mp_drawing.plot_landmarks(results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
        return output_img, skeleton, landmarks
        
    # Just get output_img and landmarks
    else:
        # Return the output image and the found landmarks.
        return output_img, skeleton, landmarks

def estimPose_video(input_file, pose_video=pose_video, landmarks_c=(234,63,247), connection_c=(117,249,77), 
                 thickness=1, circle_r=1, nrows_frames=4, ncols_frames=3):
    
    # Initialize the VideoCapture object to read from a video stored in the disk.
    video = cv2.VideoCapture(input_file)
    
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(video.get(cv2.CAP_PROP_FPS))
    frames = []
    original_video_frames = []
    only_skeleton_frames = []
    
    all_landmarks = []
    for i in range(total_frames):
        # Read a frame.
        ok, frame = video.read()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        original_video_frames.append(frame.copy())
    
        # Check if frame is not read properly.
        if not ok:
            # Break the loop.
            break
        
        # Get the width and height of the frame
        frame_height, frame_width, _ =  frame.shape
        # Resize the frame while keeping the aspect ratio.
        frame = cv2.resize(frame, (int(frame_width * (640 / frame_height)), 640))
        frame, skeleton, landmarks = estimPose_img(frame, pose_video, landmarks_c, connection_c, thickness, 
                              circle_r, display=False)
        frames.append(frame)
        all_landmarks.append(landmarks)
        only_skeleton_frames.append(skeleton)
    return original_video_frames, only_skeleton_frames, frames, all_landmarks

# image similarity

In [1]:
from scoring import evaluate_everything, refine_landmarks
from detector import PoseDetector
import matplotlib.pyplot as plt

# 비교할 이미지 경로 설정
p1 = "../images/jun_v.jpg"
p2 = "../images/wrong_pose_img.jpg"

# detector 정의. model_size로 크기 조정(0 ~ 2)
image_detector = PoseDetector(model_size=2, mode='IMAGE')

# 예측 수행
l1, seg1, ann_img1, bs1 = image_detector.get_detection(p1)
l2, seg2, ann_img2, bs2 = image_detector.get_detection(p2)
np_l1 = refine_landmarks(l1)
np_l2 = refine_landmarks(l2)


# 예측 landmark를 통한 평가
print("normalized result: ")
r = evaluate_everything(np_l1, bs1, np_l2, bs2, normalize=True)
print('\n\n')

print("not normalized result: ")
r = evaluate_everything(np_l1, bs1, np_l2, bs2, normalize=False)

d:\naver_boostcamp\project\level4-cv-finalproject-hackathon-cv-19-lv3\streamlit\models\pose_landmarker_heavy.task 파일이 이미 존재합니다.
normalized result: 
L1_score: 0.899100075331185
L2_distance: 0.6153559122962446
cos_similarity: 0.9820466798958206
PCK(thres=0.10): 0.5238095238095238
oks:: 0.725824407828108
matched: {'nose': False, 'left_ear': True, 'right_ear': False, 'left_shoulder': True, 'right_shoulder': False, 'left_elbow': False, 'right_elbow': False, 'left_wrist': False, 'right_wrist': True, 'left_pinky': False, 'right_pinky': False, 'left_hip': True, 'right_hip': False, 'left_knee': False, 'right_knee': True, 'left_ankle': True, 'right_ankle': True, 'left_heel': True, 'right_heel': True, 'left_foot_index': True, 'right_foot_index': True}



not normalized result: 
L1_score: 0.7728069701652974
L2_distance: 1.5183486853623762
cos_similarity: 0.9792274390460615
PCK(thres=0.10): 0.047619047619047616
oks:: 0.3294177896409596
matched: {'nose': False, 'left_ear': False, 'right_ear': False,

## Video Similarity without DTW

In [2]:
video_path_1 = "../videos/마라탕후루1.mp4"
video_path_2 = "../videos/마라탕후루2.mp4"
wrong_video = "../videos/케이춤.mp4"

video_detector = PoseDetector(model_size=2, mode='VIDEO')
original_frames_1, skeleton_1, ann_1, all_landmarks_1 = video_detector.estimPose_video(video_path_1)
video_detector.reset_detector()
original_frames_2, skeleton_2, ann_2, all_landmarks_2 = video_detector.estimPose_video(video_path_2)
video_detector.reset_detector()
_, _, _, wrong_landmarks = video_detector.estimPose_video(wrong_video)

d:\naver_boostcamp\project\level4-cv-finalproject-hackathon-cv-19-lv3\streamlit\models\pose_landmarker_heavy.task 파일이 이미 존재합니다.
video information!!
FPS:  30
total frame length:  845


100%|██████████| 845/845 [01:14<00:00, 11.36it/s]


video information!!
FPS:  30
total frame length:  863


100%|██████████| 863/863 [01:16<00:00, 11.32it/s]


video information!!
FPS:  29
total frame length:  823


100%|██████████| 823/823 [01:13<00:00, 11.23it/s]


In [21]:
import numpy as np
from collections import defaultdict

# 본 코드에서는 단순히 짧은 영상에 맞춰서 스코어를 계산하는 로직을 활용할 예정
def get_score_from_frames(all_landmarks1, all_landmarks2, score_target='PCK', pck_thres=0.1, thres=0.4, ignore_z=False):
    total_results = defaultdict(list)
    low_score_frames = []
    bs1 = np.array([1, 0, 0])
    bs2 = np.array([1, 0, 0])

    for frame_num, (landmarks1, landmarks2) in enumerate(zip(all_landmarks1, all_landmarks2)):
        np_l1 = refine_landmarks(landmarks1)
        np_l2 = refine_landmarks(landmarks2)
        results = evaluate_everything(np_l1, bs1, np_l2, bs2, pck_thres=pck_thres, verbose=False, ignore_z=ignore_z)
        for k, v in results.items():
            total_results[k].append(v)
            if score_target in k and results[k] < thres:
                low_score_frames.append(frame_num)
    
    for k in results.keys():
        if k=="matched":
            continue
        total_results[k] = np.mean(total_results[k])
    
    return total_results, low_score_frames

In [27]:
print("마라탕후루(28초) vs 마라탕후루(28초)")
total_results, low_score_frames = get_score_from_frames(all_landmarks_1, all_landmarks_2, pck_thres=0.05, thres=0.4, ignore_z=True)
for k, v in total_results.items():
    print(f"{k}: {v}")
print("low score frame rate: ", len(low_score_frames) / min(len(all_landmarks_1), len(all_landmarks_2)))
print('\n\n')

마라탕후루(28초) vs 마라탕후루(28초)
L1_score: 0.9471806770847617
L2_distance: 0.32746017876379346
cos_similarity: 0.9974753963735945
PCK(thres=0.05): 0.6082840236686391
oks:: 0.8870574904497737
matched: nan
low score frame rate:  0.17041420118343195





In [28]:
print("마라탕후루(28초) vs 케이춤(27초): ")
total_results, low_score_frames = get_score_from_frames(all_landmarks_1, wrong_landmarks, pck_thres=0.05, thres=0.4, ignore_z=True)
for k, v in total_results.items():
    print(f"{k}: {v}")
print("low score frame rate: ", len(low_score_frames) / min(len(all_landmarks_1), len(wrong_landmarks)))

마라탕후루(28초) vs 케이춤(27초): 
L1_score: 0.9321607998192717
L2_distance: 0.41872090618002156
cos_similarity: 0.9964980324233556
PCK(thres=0.05): 0.4621304171729445
oks:: 0.8331294437010619
matched: nan
low score frame rate:  0.362089914945322


## Video Similarity with DTW

In [16]:
import scoring
import importlib
import detector

importlib.reload(detector)

<module 'detector' from 'd:\\naver_boostcamp\\project\\level4-cv-finalproject-hackathon-cv-19-lv3\\streamlit\\detector.py'>

In [17]:
importlib.reload(scoring)

<module 'scoring' from 'd:\\naver_boostcamp\\project\\level4-cv-finalproject-hackathon-cv-19-lv3\\streamlit\\scoring.py'>

In [36]:
import numpy as np
from scipy.spatial.distance import euclidean, cosine
from fastdtw import fastdtw
from scoring import normalize_landmarks_to_range_by_mean, normalize_landmarks_to_range
from copy import deepcopy

def cosine_similarity(a, b):
    #return np.inner(a, b) / (np.linalg.norm(a) * (np.linalg.norm(b)))
    return np.dot(a, b) / (np.linalg.norm(a) * (np.linalg.norm(b)))

def pck(a, b, threshold=0.1):
    a = a.reshape(-1, 3)
    b = b.reshape(-1, 3)
    distance = np.linalg.norm(a[:, :3] - b[:, :3], axis=1)
    pck_score = np.mean(distance < threshold)
    return pck_score

def evaluate(all_landmarks_1, all_landmarks_2):
    all_landmarks_np_1 = np.array([refine_landmarks(l) for l in all_landmarks_1])
    all_landmarks_np_2 = np.array([refine_landmarks(l) for l in all_landmarks_2])
    all_landmarks_np_N_2 = normalize_landmarks_to_range_by_mean(all_landmarks_np_1, all_landmarks_np_2)

    # flattening to compare
    all_landmarks_np_flatten_1 = all_landmarks_np_1[..., :3].reshape(all_landmarks_np_1.shape[0], -1)
    all_landmarks_np_flatten_2 = all_landmarks_np_N_2[..., :3].reshape(all_landmarks_np_N_2.shape[0], -1)

    distance, path = fastdtw(all_landmarks_np_flatten_1, all_landmarks_np_flatten_2, dist=euclidean)
    print("euclidean distance: ", distance)

    # 유클리디안 거리로 찾아낸 path를 통해 정규화를 다시 수행
    all_landmarks_np_flatten_2 = np.zeros_like(all_landmarks_np_2)
    for idx1, idx2 in path:
        all_landmarks_np_flatten_2[idx2, ...] = normalize_landmarks_to_range(
            all_landmarks_np_1[idx1, ...],
            all_landmarks_np_N_2[idx2, ...]
        )
    all_landmarks_np_flatten_2 = all_landmarks_np_flatten_2[..., :3].reshape(all_landmarks_np_2.shape[0], -1)

    distance, path = fastdtw(all_landmarks_np_flatten_1, all_landmarks_np_flatten_2, dist=cosine_similarity)
    print("cosine similiarity: ", distance)

    distance, path = fastdtw(all_landmarks_np_flatten_1, all_landmarks_np_flatten_2, dist=pck)
    print("pck: ", distance)
    


print('right')
evaluate(all_landmarks_1, all_landmarks_2)

print('\n\nwrong')
evaluate(all_landmarks_1, wrong_landmarks)

right
euclidean distance:  973.058483884846
cosine similiarity:  813.8465076092934
pck:  101.19047619047778


wrong
euclidean distance:  992.4159004334344
cosine similiarity:  791.3661532840445
pck:  105.190476190477


# skeleton vector활용 1

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

keypoint_names = [
    "nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)",
    "right eye", "right eye (outer)", "left ear", "right ear", "mouth (left)",
    "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow",
    "left wrist", "right wrist", "left pinky", "right pinky", "left index",
    "right index", "left thumb", "right thumb", "left hip", "right hip",
    "left knee", "right knee", "left ankle", "right ankle", "left heel",
    "right heel", "left foot index", "right foot index"
]

# select for oks calculation
selected_keypoints = [0,7,8,11,12,13,14,15,16,23,24,25,26,27,28]

connections = [
    (0,1), (0,2), # Nose to Ears
    (3,5), (4,6), # Shoulders to Elbows
    (5,7), (6,8), # Elbows to Wrists
    (9,11), (10,12), # Hips to Knees
    (11,13), (12,14), # Knees to Ankles
    (3, 4), (4, 10), (10, 9), (9, 3) # body
]

# for cosine similarity
vector_list = [
    (1, 2),
    (3, 5),
    (4, 6),
    (5, 7),
    (6, 8),
    (9, 11),
    (10, 12),
    (11, 13),
    (12, 14)
]


# Get keypoints data & bounded box size from 1 frame
def get_keypoints_and_boxsize(image):
    # return 
    # keypoints : list([x, y, z, visibility], ...)
    # boxsize : detection box length[가로, 세로, 높이]
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    keypoints = []
    if results.pose_landmarks:
        for idx, landmark in enumerate(results.pose_landmarks.landmark):
            if idx in selected_keypoints:
                keypoints.append([landmark.x, landmark.y, landmark.z, landmark.visibility])

    xmin, xmax, ymin, ymax, zmin, zmax = 0, 0, 0, 0, 0, 0
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            if xmin == 0:
                xmin, ymin, zmin = landmark.x, landmark.y, landmark.z
            
            else:
                xmin, xmax, ymin, ymax, zmin, zmax = min(xmin, landmark.x), max(xmax, landmark.x), min(ymin, landmark.y), max(ymax, landmark.y), min(zmin, landmark.z), max(zmax, landmark.z)
    
    boxsize = (xmin, xmax, ymin, ymax, zmin, zmax)
    boxsize = [boxsize[2 * i + 1] - boxsize[2 * i] for i in range(3)]

    return keypoints, boxsize


# Calculate OKS value from 2 keypoints data from each data
def oks(gt, preds, idx, boxsize):
    sigmas = np.array([.026, .035, .035, .079, .079, .072, .072, .062, .062, .107, .107, .087, .087, .089, .089])
    dx = gt[0] - preds[0]
    dy = gt[1] - preds[1]
    bbox_gt = boxsize[0] ** 2 + boxsize[1] ** 2
    kp_c = sigmas[idx]
    return np.exp(-(dx ** 2 + dy ** 2) / (2 * (bbox_gt) * (kp_c**2)))


# Make cosine similarity to percent form
def cosine_similarity_to_percentage(similarity_list):
    similarity = np.mean(similarity_list)
    return (similarity + 1) * 50


# Calculate cosine similarity from each keypoint data
def cos_sim_w_keypoint(keypoints1, keypoints2):
    global vector_list
    cos_sim_list = []

    for vector in vector_list:
        z_num = 2
        idx1, idx2 = vector
        vec1 = (keypoints1[idx2][:z_num] - keypoints1[idx1][:z_num]).reshape(1, -1)
        vec2 = (keypoints2[idx2][:z_num] - keypoints2[idx1][:z_num]).reshape(1, -1)
        sim_value = cosine_similarity(vec1, vec2)
        cos_sim_list.append(sim_value)
    
    return cos_sim_list


# Calculate OKS & Cosine similarity from each keypoint data
def weighted_similarity(keypoints1, keypoints2, boxsize):
    keypoints1 = np.array(keypoints1)
    keypoints2 = np.array(keypoints2)

    if keypoints1.shape != keypoints2.shape:
        print(keypoints1.shape, keypoints2.shape)
        raise ValueError("Keypoint shapes do not match!")
    
    oks_list = []
    for i in range(len(keypoints1)):
        oks_list.append(oks(keypoints1[i][:3], keypoints2[i][:3], i, boxsize))

    cos_sim_list = cos_sim_w_keypoint(keypoints1, keypoints2)

    return cosine_similarity_to_percentage(np.mean(cos_sim_list)), (np.mean(oks_list)) * 100


# Make mean coordinate data from keypoints list
def mean_value_of_keypoints(keypoints):
    mean_of_keypoints = np.zeros_like(keypoints[0])
    for key in keypoints:
        mean_of_keypoints += key

    mean_of_keypoints /= len(keypoints)
    return mean_of_keypoints


def Scoring(video_path1, video_path2):
    cap1 = cv2.VideoCapture(video_path1)
    cap2 = cv2.VideoCapture(video_path2)

    frame_count = -1

    # List of OKS & Cosine similarity from each frame
    okslist = []
    cos_list = []

    # List of OKS & Cosine similarity from every 15 frame
    okslist_mean = []
    cos_list_mean = []

    # Make keypoint list
    list_keypoints1 = []
    list_keypoints2 = []

    while cap1.isOpened() and cap2.isOpened():
        frame_count += 1
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()

        if ret1 and ret2:
            keypoints1, boxsize = get_keypoints_and_boxsize(frame1)
            keypoints2, _ = get_keypoints_and_boxsize(frame2)

            list_keypoints1.append(keypoints1)
            list_keypoints2.append(keypoints2)

            similarity, oks_percent = weighted_similarity(keypoints1, keypoints2, boxsize) # Calculate Scores from each frame
            okslist.append(oks_percent)
            cos_list.append(similarity)
            print(f"Frame {frame_count+1}: Weighted similarity between keypoints1 and video: {similarity}")
            print(f"Frame {frame_count+1}: Weighted similarity between keypoints1 and video: {oks_percent}")

            if len(list_keypoints1) == 15:
                mean_keypoints1 = mean_value_of_keypoints(list_keypoints1)
                mean_keypoints2 = mean_value_of_keypoints(list_keypoints2)

                similarity_mean, oks_percent_mean = weighted_similarity(mean_keypoints1, mean_keypoints2, boxsize) # Calculate Scores from each mean frame
                okslist_mean.append(oks_percent_mean)
                cos_list_mean.append(similarity_mean)
                print(f"Frame {frame_count+1}: Weighted similarity between mean keypoints1 and video: {similarity_mean}")
                print(f"Frame {frame_count+1}: Weighted similarity between mean keypoints1 and video: {oks_percent_mean}")

                list_keypoints1 = []
                list_keypoints2 = []
            
            # Press 'q' to exit the loop and close the video window
            if cv2.waitKey(100) & 0xFF == ord('q'):
                break

        else:
            break


    print(f'oks = {np.mean(okslist)}, cos = {np.mean(cos_list)}')           # Print the score from each frame
    print(f'oks = {np.mean(okslist_mean)}, cos = {np.mean(cos_list_mean)}') # Print the score from every 15 frame
    cap2.release()
    cv2.destroyAllWindows()

In [None]:
Scoring("../마라탕후루1.mp4", "../마라탕후루2.mp4")

In [None]:
Scoring("../마라탕후루1.mp4", "../엔터테이먼트.mp4")

In [None]:
Scoring("../엔터테이먼트.mp4", "../마라탕후루2.mp4")

# OKS(거리기반 스코어), PCK(거리기반 정확도)

In [None]:
import mediapipe as mp # Mediapipe 라이브러리를 임포트합니다.
import numpy as np # 배열 및 수학 연산을 위한 NumPy 라이브러리를 임포트합니다.
import cv2 # OpenCV 라이브러리를 임포트합니다.
import shutil # 파일 복사 및 삭제를 위한 shutil 모듈을 임포트합니다.
import os # 운영 체제 관련 작업을 위한 os 모듈을 임포트합니다.
import json # JSON 데이터 처리를 위한 모듈을 임포트합니다.


# Mediapipe 라이브러리를 이용한 포즈 추출 설정
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# 사용할 키포인트 이름 및 선택된 키포인트 인덱스 목록 설정
keypoint_names = [
    "nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)",
    "right eye", "right eye (outer)", "left ear", "right ear", "mouth (left)",
    "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow",
    "left wrist", "right wrist", "left pinky", "right pinky", "left index",
    "right index", "left thumb", "right thumb", "left hip", "right hip",
    "left knee", "right knee", "left ankle", "right ankle", "left heel",
    "right heel", "left foot index", "right foot index"
]

selected_keypoints = [0,7,8,11,12,13,14,15,16,23,24,25,26,27,28]

connections = [
    (0,1), (0,2), # Nose to Ears
    (3,5), (4,6), # Shoulders to Elbows
    (5,7), (6,8), # Elbows to Wrists
    (9,11), (10,12), # Hips to Knees
    (11,13), (12,14), # Knees to Ankles
    (3, 4), (4, 10), (10, 9), (9, 3) # body
]

oks_cnt = [[] for _ in range(11)]
pck_cnt = [[] for _ in range(11)]

def delete_file_or_folder(path):
    try:
        if os.path.exists(path):
            if os.path.isfile(path):
                os.remove(path)
                print(f"File {path} deleted successfully.")
            elif os.path.isdir(path):
                shutil.rmtree(path)
                print(f"Folder {path} and its contents deleted successfully.")
        else:
            print(f"Path {path} not found. Skipping deletion.")
    except Exception as e:
        print(f"An error occurred while deleting: {e}")


# 정답 프레임에서 키포인트 데이터 및 바운딩 박스 크기를 가져오는 함수
def get_keypoints_and_boxsize(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    keypoints = []
    if results.pose_landmarks:
        for idx, landmark in enumerate(results.pose_landmarks.landmark):
            if idx in selected_keypoints:
                keypoints.append([landmark.x, landmark.y, landmark.z, landmark.visibility])

    xmin, xmax, ymin, ymax, zmin, zmax = 0, 0, 0, 0, 0, 0
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            if xmin == 0:
                xmin, ymin, zmin = landmark.x, landmark.y, landmark.z

            else:
                xmin, xmax, ymin, ymax, zmin, zmax = min(xmin, landmark.x), max(xmax, landmark.x), min(ymin,
                                                                                                       landmark.y), max(
                    ymax, landmark.y), min(zmin, landmark.z), max(zmax, landmark.z)

    boxsize = (xmin, xmax, ymin, ymax, zmin, zmax)
    boxsize = [boxsize[2 * i + 1] - boxsize[2 * i] for i in range(3)]

    return keypoints, boxsize


# OKS 값 계산 함수
def oks(gt, preds, idx, boxsize):
    sigmas = np.array([.026, .035, .035, .079, .079, .072, .072, .062, .062, .107, .107, .087, .087, .089, .089])
    dx = gt[0] - preds[0]
    dy = gt[1] - preds[1]
    bbox_gt = boxsize[0] ** 2 + boxsize[1] ** 2
    kp_c = sigmas[idx]
    return np.exp(-(dx ** 2 + dy ** 2) / (2 * (bbox_gt) * (kp_c ** 2)))


# PCK 값 계산 함수
def pck(gt, preds, threshold):
    dx = gt[0] - preds[0]
    dy = gt[1] - preds[1]
    distance = np.sqrt(dx ** 2 + dy ** 2)
    return 1.0 if distance < threshold else 0.0


# 가중치가 적용된 유사도 계산 함수
def weighted_similarity(keypoints1, keypoints2, boxsize):

    oks_list = []
    pck_list = []
    for i in range(len(keypoints1)):
        oks_list.append(oks(keypoints1[i][:3], keypoints2[i][:3], i, boxsize))
        pck_list.append(pck(keypoints1[i][:3], keypoints2[i][:3], 0.1))

    return (np.mean(oks_list)) * 100, (np.mean(pck_list)) * 100


# 키포인트 리스트의 평균 좌표 계산 함수
def mean_value_of_keypoints(keypoints):
    mean_of_keypoints = np.zeros_like(keypoints[0])
    for key in keypoints:
        mean_of_keypoints += key

    mean_of_keypoints /= len(keypoints)
    return mean_of_keypoints


def Scoring(video_path1, video_path2):
    frame_cnt = 0
    # 업로드된 동영상 파일을 열기
    cap1 = cv2.VideoCapture(video_path1)
    cap2 = cv2.VideoCapture(video_path2)

    # 각 프레임에서의 OKS 및 pck의 리스트
    oks_list = []
    pck_list = []

    # 사용자의 키포인트 리스트
    user_keypoints = []

    # 동영상의 모든 프레임을 처리
    while cap1.isOpened() and cap2.isOpened():
        ret1, frame1 = cap1.read()
        frame1 = cv2.flip(frame1, 1)

        ret2, frame2 = cap2.read()
        frame2 = cv2.flip(frame2, 1)

        if ret1 and ret2:
            # 현재 프레임에서 사용자의 키포인트 및 바운딩 박스 크기 가져오기
            user_key, _ = get_keypoints_and_boxsize(frame1)
            user_keypoints.append(user_key)

            answer_key, _ = get_keypoints_and_boxsize(frame2)

            # 만약 정답 키포인트와 사용자 키포인트가 존재하면 점수 계산
            if len(answer_key) > 0 and len(user_key) > 0:
                oks_percent, pck_percent = weighted_similarity(np.array(answer_key), np.array(user_key),
                                                               _)  # Calculate Scores from each frame
                oks_cnt[int(oks_percent / 10)].append(frame_cnt)
                pck_cnt[int(pck_percent / 10)].append(frame_cnt)

                oks_list.append(oks_percent)
                pck_list.append(pck_percent)
        else:
            break
        frame_cnt = frame_cnt + 1

    oks_answer = np.mean(oks_list)
    pck_answer = np.mean(pck_list)
    print("oks =", oks_answer, "pck =", pck_answer)

    # JSON 응답에 넣을 데이터를 딕셔너리로 만듦 (값들을 float로 변환)
    response_data = {
        "oks_30": oks_answer,
        "pck_30": pck_answer,
        "oks_frame_score": oks_list,
        "pck_frame_score": pck_list
    }
    cap1.release()
    cap2.release()

    return response_data

In [None]:
landmarks, _ = get_keypoints_and_boxsize(cv2.imread('../images/right_pose_img.jpg'))

In [None]:
print(len(landmarks))

In [None]:
Scoring("../마라탕후루1.mp4", "../마라탕후루2.mp4")

In [None]:
Scoring("../마라탕후루1.mp4", "../엔터테이먼트.mp4")

In [None]:
Scoring("../마라탕후루2.mp4", "../엔터테이먼트.mp4")

# 거리기반 OKS, PCK and normalization

In [None]:
import mediapipe as mp # Mediapipe 라이브러리를 임포트합니다.
import numpy as np # 배열 및 수학 연산을 위한 NumPy 라이브러리를 임포트합니다.
import cv2 # OpenCV 라이브러리를 임포트합니다.
import shutil # 파일 복사 및 삭제를 위한 shutil 모듈을 임포트합니다.
import os # 운영 체제 관련 작업을 위한 os 모듈을 임포트합니다.
import json # JSON 데이터 처리를 위한 모듈을 임포트합니다.


# Mediapipe 라이브러리를 이용한 포즈 추출 설정
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# 사용할 키포인트 이름 및 선택된 키포인트 인덱스 목록 설정
keypoint_names = [
    "nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)",
    "right eye", "right eye (outer)", "left ear", "right ear", "mouth (left)",
    "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow",
    "left wrist", "right wrist", "left pinky", "right pinky", "left index",
    "right index", "left thumb", "right thumb", "left hip", "right hip",
    "left knee", "right knee", "left ankle", "right ankle", "left heel",
    "right heel", "left foot index", "right foot index"
]

selected_keypoints = [0,7,8,11,12,13,14,15,16,23,24,25,26,27,28]

connections = [
    (0,1), (0,2), # Nose to Ears
    (3,5), (4,6), # Shoulders to Elbows
    (5,7), (6,8), # Elbows to Wrists
    (9,11), (10,12), # Hips to Knees
    (11,13), (12,14), # Knees to Ankles
    (3, 4), (4, 10), (10, 9), (9, 3) # body
]

oks_cnt = [[] for _ in range(11)]
pck_cnt = [[] for _ in range(11)]

def delete_file_or_folder(path):
    try:
        if os.path.exists(path):
            if os.path.isfile(path):
                os.remove(path)
                print(f"File {path} deleted successfully.")
            elif os.path.isdir(path):
                shutil.rmtree(path)
                print(f"Folder {path} and its contents deleted successfully.")
        else:
            print(f"Path {path} not found. Skipping deletion.")
    except Exception as e:
        print(f"An error occurred while deleting: {e}")


# 정답 프레임에서 키포인트 데이터 및 바운딩 박스 크기를 가져오는 함수
def get_keypoints_and_boxsize(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    keypoints = []
    if results.pose_landmarks:
        for idx, landmark in enumerate(results.pose_landmarks.landmark):
            if idx in selected_keypoints:
                keypoints.append([landmark.x, landmark.y, landmark.z, landmark.visibility])

    xmin, xmax, ymin, ymax, zmin, zmax = 0, 0, 0, 0, 0, 0
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            if xmin == 0:
                xmin, ymin, zmin = landmark.x, landmark.y, landmark.z

            else:
                xmin, xmax, ymin, ymax, zmin, zmax = min(xmin, landmark.x), max(xmax, landmark.x), min(ymin,
                                                                                                       landmark.y), max(
                    ymax, landmark.y), min(zmin, landmark.z), max(zmax, landmark.z)

    boxsize = (xmin, xmax, ymin, ymax, zmin, zmax)
    boxsize = [boxsize[2 * i + 1] - boxsize[2 * i] for i in range(3)]

    return keypoints, boxsize


# OKS 값 계산 함수
def oks(gt, preds, idx, boxsize):
    sigmas = np.array([.026, .035, .035, .079, .079, .072, .072, .062, .062, .107, .107, .087, .087, .089, .089])
    dx = gt[0] - preds[0]
    dy = gt[1] - preds[1]
    bbox_gt = boxsize[0] ** 2 + boxsize[1] ** 2
    kp_c = sigmas[idx]
    return np.exp(-(dx ** 2 + dy ** 2) / (2 * (bbox_gt) * (kp_c ** 2)))


# PCK 값 계산 함수
def pck(gt, preds, threshold):
    dx = gt[0] - preds[0]
    dy = gt[1] - preds[1]
    distance = np.sqrt(dx ** 2 + dy ** 2)
    return 1.0 if distance < threshold else 0.0


# 가중치가 적용된 유사도 계산 함수
def weighted_similarity(keypoints1, keypoints2, boxsize):

    oks_list = []
    pck_list = []
    for i in range(len(keypoints1)):
        oks_list.append(oks(keypoints1[i][:3], keypoints2[i][:3], i, boxsize))
        pck_list.append(pck(keypoints1[i][:3], keypoints2[i][:3], 0.1))

    return (np.mean(oks_list)) * 100, (np.mean(pck_list)) * 100


# 키포인트 리스트의 평균 좌표 계산 함수
def mean_value_of_keypoints(keypoints):
    mean_of_keypoints = np.zeros_like(keypoints[0])
    for key in keypoints:
        mean_of_keypoints += key

    mean_of_keypoints /= len(keypoints)
    return mean_of_keypoints


def Scoring(video_path1, video_path2):
    frame_cnt = 0
    # 업로드된 동영상 파일을 열기
    cap1 = cv2.VideoCapture(video_path1)
    cap2 = cv2.VideoCapture(video_path2)

    # 각 프레임에서의 OKS 및 pck의 리스트
    oks_list = []
    pck_list = []

    # 사용자의 키포인트 리스트
    user_keypoints = []

    # 동영상의 모든 프레임을 처리
    while cap1.isOpened() and cap2.isOpened():
        ret1, frame1 = cap1.read()
        frame1 = cv2.flip(frame1, 1)

        ret2, frame2 = cap2.read()
        frame2 = cv2.flip(frame2, 1)

        if ret1 and ret2:
            # 현재 프레임에서 사용자의 키포인트 및 바운딩 박스 크기 가져오기
            user_key, _ = get_keypoints_and_boxsize(frame1)
            user_keypoints.append(user_key)

            answer_key, _ = get_keypoints_and_boxsize(frame2)

            # 만약 정답 키포인트와 사용자 키포인트가 존재하면 점수 계산
            if len(answer_key) > 0 and len(user_key) > 0:
                oks_percent, pck_percent = weighted_similarity(np.array(answer_key), np.array(user_key),
                                                               _)  # Calculate Scores from each frame
                oks_cnt[int(oks_percent / 10)].append(frame_cnt)
                pck_cnt[int(pck_percent / 10)].append(frame_cnt)

                oks_list.append(oks_percent)
                pck_list.append(pck_percent)
        else:
            break
        frame_cnt = frame_cnt + 1

    oks_answer = np.mean(oks_list)
    pck_answer = np.mean(pck_list)
    print("oks =", oks_answer, "pck =", pck_answer)

    # JSON 응답에 넣을 데이터를 딕셔너리로 만듦 (값들을 float로 변환)
    response_data = {
        "oks_30": oks_answer,
        "pck_30": pck_answer,
        "oks_frame_score": oks_list,
        "pck_frame_score": pck_list
    }
    cap1.release()
    cap2.release()

    return response_data

# 시각화

In [None]:
from mediapipe_inference import get_detection
import matplotlib.pyplot as plt
import cv2
import numpy as np
from scoring import refine_landmarks


def normalize_landmarks_to_range(landmarks1, landmarks2):
    """
    Normalize landmarks2 to match the coordinate range of landmarks1.

    Parameters:
        landmarks1 (numpy array): Keypoints array for the first pose (num_selected_point, 4).
        landmarks2 (numpy array): Keypoints array for the second pose (num_selected_point, 4).

    Returns:
        numpy array: Normalized landmarks2 matching the range of landmarks1.
    """
    # Calculate min and max for landmarks1 and landmarks2
    min1 = np.min(landmarks1[:, :3], axis=0)  # (x_min, y_min, z_min) for landmarks1
    max1 = np.max(landmarks1[:, :3], axis=0)  # (x_max, y_max, z_max) for landmarks1

    min2 = np.min(landmarks2[:, :3], axis=0)  # (x_min, y_min, z_min) for landmarks2
    max2 = np.max(landmarks2[:, :3], axis=0)  # (x_max, y_max, z_max) for landmarks2

    # Normalize landmarks2 to the range of landmarks1
    normalized_landmarks2 = (landmarks2[:, :3] - min2) / (max2 - min2) * (max1 - min1) + min1

    # Combine normalized coordinates with the original visibility values
    normalized_landmarks2 = np.hstack((normalized_landmarks2, landmarks2[:, 3:4]))

    return normalized_landmarks2


def draw_landmarks_on_blank_image(landmarks1, landmarks2, image_size=(640, 640)):
    """
    Draw two sets of landmarks on a blank image.

    Parameters:
        landmarks1 (numpy array): Keypoints array for the first pose (num_selected_point, 4).
        landmarks2 (numpy array): Keypoints array for the second pose (num_selected_point, 4).
        image_size (tuple): Size of the blank image (height, width).

    Returns:
        numpy array: Image with landmarks visualized.
    """
    # Create a blank image
    blank_image = np.zeros((image_size[0], image_size[1], 3), dtype=np.uint8)

    # Normalize coordinates to fit within the image size
    def normalize_coordinates(landmarks, image_size):
        height, width = image_size
        normalized_landmarks = []
        for landmark in landmarks:
            x, y = int(landmark[0] * width), int(landmark[1] * height)
            normalized_landmarks.append((x, y))
        return normalized_landmarks

    # Normalize landmarks
    normalized_landmarks1 = normalize_coordinates(landmarks1, image_size)
    normalized_landmarks2 = normalize_coordinates(landmarks2, image_size)

    # Draw landmarks1 (red)
    for x, y in normalized_landmarks1:
        cv2.circle(blank_image, (x, y), radius=5, color=(0, 0, 255), thickness=-1)

    # Draw landmarks2 (blue)
    for x, y in normalized_landmarks2:
        cv2.circle(blank_image, (x, y), radius=5, color=(255, 0, 0), thickness=-1)

    # Optionally connect landmarks (example: skeletal connections can be added here)
    # You can define a list of connections (e.g., POSE_CONNECTIONS) to draw lines between keypoints.

    return blank_image

p1 = "../images/승윤팔짱1.jpg"
p2= "../images/준일팔짱.jpg"
l1, seg1, ann_img1, bs1 = get_detection(p1)
l2, seg2, ann_img2, bs2 = get_detection(p2)
np_l1 = refine_landmarks(l1)
np_l2 = refine_landmarks(l2)
np_l2 = normalize_landmarks_to_range(np_l1, np_l2)

from keypoint_map import KEYPOINT_MAPPING, SELECTED_KEYPOINTS
for i in range(np_l1.shape[0]):
    print(f"difference of keypoint {KEYPOINT_MAPPING[SELECTED_KEYPOINTS[i]]}: {np.linalg.norm(np_l2[i] - np_l1[i])}")

plt.imshow(draw_landmarks_on_blank_image(np_l1, np_l2))
plt.show()