In [2]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt

import warnings

warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")


In [3]:
#Khởi tạo mediapie
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

#Hàm tính khoảng cách

In [4]:
from math import sqrt

# rescale frame 1/2
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)

#Tính khoảng cách giữa 2 điểm trong không gian 2D
def calculate_distance(pointX, pointY) -> float:
    '''
    Calculate a distance between 2 points
    '''

    x1, y1 = pointX
    x2, y2 = pointY

    return sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

#Tính góc giữa 3 điểm trong không gian 3D
def calculate_angle(pointA, pointB, pointC) -> float:
    '''
    Calculate angle between 3 points in 3D space using dot product.
    '''
    A = np.array(pointA)
    B = np.array(pointB)
    C = np.array(pointC)

    # Vector BA và BC
    BA = A - B
    BC = C - B

    # Tính tích vô hướng
    dot_product = np.dot(BA, BC)
    
    # Tính độ dài vector
    norm_BA = np.linalg.norm(BA)
    norm_BC = np.linalg.norm(BC)

    # Tính góc bằng công thức cos(theta) = (A.B) / (|A| * |B|)
    cos_theta = dot_product / (norm_BA * norm_BC)
    
    # Chuyển từ radian sang độ
    angle = np.degrees(np.arccos(np.clip(cos_theta, -1.0, 1.0)))

    return angle


## 1. Lỗi chân quá rộng hoặc quá hẹp
Vì vị trí bàn chân ko di chuyển trong quá trình tập nên chỉ cần so sánh tỉ lệ khoảng cách giữa 2 chân, 2 vai để đưa ra dự đoán


Dataframe cho khảo sát chân rộng, hẹp

In [5]:
#Chọn các điểm cần lấy
Important_kp = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
]

#Tạo header cho dataframe
header = ["label"]

for kp in Important_kp:
    header.extend([f"{kp}_x", f"{kp}_y", f"{kp}_z", f"{kp}_visibility"])

header

['label',
 'NOSE_x',
 'NOSE_y',
 'NOSE_z',
 'NOSE_visibility',
 'LEFT_SHOULDER_x',
 'LEFT_SHOULDER_y',
 'LEFT_SHOULDER_z',
 'LEFT_SHOULDER_visibility',
 'RIGHT_SHOULDER_x',
 'RIGHT_SHOULDER_y',
 'RIGHT_SHOULDER_z',
 'RIGHT_SHOULDER_visibility',
 'LEFT_HIP_x',
 'LEFT_HIP_y',
 'LEFT_HIP_z',
 'LEFT_HIP_visibility',
 'RIGHT_HIP_x',
 'RIGHT_HIP_y',
 'RIGHT_HIP_z',
 'RIGHT_HIP_visibility',
 'LEFT_KNEE_x',
 'LEFT_KNEE_y',
 'LEFT_KNEE_z',
 'LEFT_KNEE_visibility',
 'RIGHT_KNEE_x',
 'RIGHT_KNEE_y',
 'RIGHT_KNEE_z',
 'RIGHT_KNEE_visibility',
 'LEFT_ANKLE_x',
 'LEFT_ANKLE_y',
 'LEFT_ANKLE_z',
 'LEFT_ANKLE_visibility',
 'RIGHT_ANKLE_x',
 'RIGHT_ANKLE_y',
 'RIGHT_ANKLE_z',
 'RIGHT_ANKLE_visibility']

In [6]:
#Đọc dữ liệu từ frame trả về points
def extract_important_keypoints(results) -> list:
    '''
    Extract important keypoints from mediapipe pose detection
    '''
    landmarks = results.pose_landmarks.landmark

    data = []
    for lm in Important_kp:
        keypoint = landmarks[mp_pose.PoseLandmark[lm].value]
        data.append([keypoint.x, keypoint.y, keypoint.z, keypoint.visibility])
    
    return np.array(data).flatten().tolist()

Xem data chuẩn

In [7]:
#Xử lý từng video để tính khoảng cách chân, vai
def process_frame(Video_folder, Video_name):
    
    Cap = cv2.VideoCapture(f"{Video_folder}/{Video_name}")

    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        while Cap.isOpened():
            ret, image = Cap.read()

            if not ret:
                break

            # Chuyển ảnh sang RGB
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = rescale_frame(image, percent=50)
            image.flags.writeable = True  # Make the image writable

            #trích xuất kpkp
            results = pose.process(image)

            # Kiểm tra có nhận được keypoint không
            if not results.pose_landmarks:
                continue
            
            # Chuyển lại ảnh sang BGR để hiển thị
            # image.flags.writeable = True
            # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            #khởi tạo biến tính khoảng cách
            shoulder_width = feet_width = None
            
            try:
                # Lấy kp từ frame
                row = extract_important_keypoints(results)
                X = pd.DataFrame([row], columns=header[1:])

                # Tinhs toán và so sánh khoảng cách vai, hông, chân
                landmarks = results.pose_landmarks.landmark
                # Khoảng cách giữa 2 vai
                left_shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
                right_shoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y]

                shoulder_width = calculate_distance(left_shoulder, right_shoulder)

                # Khoảng cách giữa 2 chân
                left_ankle = [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y]
                right_ankle = [landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value].y]

                feet_width = calculate_distance(left_ankle, right_ankle)


                # Nền
                cv2.rectangle(image, (0, 0), (500, 60), (245, 117, 16), -1)

                # Display feet distance
                cv2.putText(image, "FEET", (15, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(feet_width, 2)), (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                # Display shoulder distance
                cv2.putText(image, "SHOULDER", (95, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(shoulder_width, 2)), (90, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)


            except Exception as e:
                print(f"Error: {e}")

            # Draw landmarks and connections
            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

            df.loc[len(df)] = [Video_name, shoulder_width, feet_width]

            # plt.imshow(image)
            # plt.axis("off")  # Ẩn trục tọa độ
            # plt.show()

        Cap.release()


In [8]:
#process_frame(Cap)
import os

#Làm sạch dữ liệu dataframe trước khi thêm dữ liệu mới
df = pd.DataFrame(columns=["video", "shoulder_width", "feet_width"])

# Đường dẫn thư mục chứa video
video_folder = "Data/Train/Correct"
# video_folder = "Data/Train/Xuong_qua_xau"
# video_folder = "Data/Train/Gap_lung"
# video_folder = "Data/Train/Chan_qua_rong"
# video_folder = "Data/Train/Chan_qua_hep"
video_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]

for video in video_files:
    process_frame(video_folder, video)

In [9]:
df 

Unnamed: 0,video,shoulder_width,feet_width
0,20250228_080823000_iOS (video-converter.com).mp4,0.098759,0.072577
1,20250228_080823000_iOS (video-converter.com).mp4,0.098721,0.074630
2,20250228_080823000_iOS (video-converter.com).mp4,0.098385,0.075260
3,20250228_080823000_iOS (video-converter.com).mp4,0.097459,0.075738
4,20250228_080823000_iOS (video-converter.com).mp4,0.096742,0.075840
...,...,...,...
6660,VID_20250307_091538.mp4,0.096830,0.089440
6661,VID_20250307_091538.mp4,0.096469,0.089385
6662,VID_20250307_091538.mp4,0.096793,0.089124
6663,VID_20250307_091538.mp4,0.096223,0.089858


In [10]:
#Tính toán tỉ lệ giữa vai và chân
df["ratio"] = df["feet_width"] / df["shoulder_width"]
df

Unnamed: 0,video,shoulder_width,feet_width,ratio
0,20250228_080823000_iOS (video-converter.com).mp4,0.098759,0.072577,0.734887
1,20250228_080823000_iOS (video-converter.com).mp4,0.098721,0.074630,0.755966
2,20250228_080823000_iOS (video-converter.com).mp4,0.098385,0.075260,0.764950
3,20250228_080823000_iOS (video-converter.com).mp4,0.097459,0.075738,0.777133
4,20250228_080823000_iOS (video-converter.com).mp4,0.096742,0.075840,0.783939
...,...,...,...,...
6660,VID_20250307_091538.mp4,0.096830,0.089440,0.923685
6661,VID_20250307_091538.mp4,0.096469,0.089385,0.926569
6662,VID_20250307_091538.mp4,0.096793,0.089124,0.920774
6663,VID_20250307_091538.mp4,0.096223,0.089858,0.933850


In [11]:
#Phân tích bộ data
df.describe()

Unnamed: 0,shoulder_width,feet_width,ratio
count,6665.0,6665.0,6665.0
mean,0.096185,0.083392,0.870304
std,0.0076,0.01168,0.127178
min,0.073628,0.055971,0.598366
25%,0.092032,0.072032,0.755579
50%,0.095403,0.087403,0.889804
75%,0.099147,0.091515,0.975125
max,0.116609,0.106113,1.113975


# Kết luận 
Radio feet/shoulder > 1.11 : Rộng <br>
Radio feet/shoulder < 0.59 : hẹp <br>
Còn lại là đúng