In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np
import os


In [2]:
# 加载 MoveNet 模型
model = hub.load('https://tfhub.dev/google/movenet/singlepose/lightning/4')

# 定义输出文件夹
output_dir = r'C:\data\result\bone_recognition\movenet'
os.makedirs(output_dir, exist_ok=True)

# 加载视频
video_path = r'C:\data\video\0-两手托天理三焦（八段锦）\standard_0.mp4'
cap = cv2.VideoCapture(video_path)

# 获取视频信息
fourcc = cv2.VideoWriter_fourcc(*'XVID')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# 定义输出视频
output_path = os.path.join(output_dir, 'output_video.avi')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

In [3]:
# 定义关键点
KEYPOINTS = {
    0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear',
    5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow',
    9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip',
    13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle'
}

# 定义骨架连接
EDGES = {
    (0, 1): 'm', (0, 2): 'm', (1, 3): 'm', (2, 4): 'm', (0, 5): 'm', (0, 6): 'm',
    (5, 7): 'm', (7, 9): 'm', (6, 8): 'm', (8, 10): 'm', (5, 6): 'y', (5, 11): 'm',
    (6, 12): 'm', (11, 12): 'y', (11, 13): 'm', (13, 15): 'm', (12, 14): 'm', (14, 16): 'm'
}

In [4]:
# 处理视频每一帧
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # 将帧转换为 TensorFlow 需要的格式
    input_image = tf.image.resize_with_pad(np.expand_dims(frame, axis=0), 192, 192)
    input_image = tf.cast(input_image, dtype=tf.int32)

    # 模型预测
    keypoints = model.signatures['serving_default'](input_image)
    keypoints = keypoints['output_0'].numpy()
    
    # 获取关键点坐标
    keypoints = np.squeeze(keypoints)
    
    # 获取帧的宽和高
    height, width, _ = frame.shape

    # 获取鼻子和耳朵的位置
    nose_y, nose_x, nose_confidence = keypoints[0]  # 鼻子
    left_ear_y, left_ear_x, left_ear_confidence = keypoints[3]  # 左耳
    right_ear_y, right_ear_x, right_ear_confidence = keypoints[4]  # 右耳
    
    # 获取眼睛的位置
    left_eye_y, left_eye_x, left_eye_confidence = keypoints[1]  # 左眼
    right_eye_y, right_eye_x, right_eye_confidence = keypoints[2]  # 右眼
    
    # 对眼睛位置进行限制调整
    if left_eye_confidence > 0.3 and nose_confidence > 0.3:
        # 限制左眼必须在鼻子上方，并在左耳的前方
        if left_eye_y >= nose_y or left_eye_x >= left_ear_x:
            left_eye_y = nose_y - 0.05  # 左眼在鼻子上方
            left_eye_x = (left_ear_x + nose_x) / 2 - 0.05  # 左眼在耳朵和鼻子之间偏左

    if right_eye_confidence > 0.3 and nose_confidence > 0.3:
        # 限制右眼必须在鼻子上方，并在右耳的前方
        if right_eye_y >= nose_y or right_eye_x <= right_ear_x:
            right_eye_y = nose_y - 0.05  # 右眼在鼻子上方
            right_eye_x = (right_ear_x + nose_x) / 2 + 0.05  # 右眼在耳朵和鼻子之间偏右
    
    # 更新关键点
    keypoints[1] = [left_eye_y, left_eye_x, left_eye_confidence]  # 更新左眼坐标
    keypoints[2] = [right_eye_y, right_eye_x, right_eye_confidence]  # 更新右眼坐标

    # 绘制骨架
    for edge, color in EDGES.items():
        p1, p2 = edge
        y1, x1, confidence1 = keypoints[p1]
        y2, x2, confidence2 = keypoints[p2]
        if confidence1 > 0.3 and confidence2 > 0.3:
            # 连接关键点
            cv2.line(frame, (int(x1 * width), int(y1 * height)), (int(x2 * width), int(y2 * height)), (0, 255, 0), 2)
    
    # 绘制关键点
    for idx, kp in enumerate(keypoints):
        y, x, confidence = kp
        if confidence > 0.3:
            # 绘制置信度超过阈值的关键点
            cv2.circle(frame, (int(x * width), int(y * height)), 5, (0, 0, 255), -1)
            cv2.putText(frame, KEYPOINTS[idx], (int(x * width), int(y * height) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # 保存帧到输出视频
    out.write(frame)
    
    # 显示处理中的视频（可选）
    cv2.imshow('MoveNet Pose Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"视频已处理并保存到 {output_path}")


视频已处理并保存到 C:\data\result\bone_recognition\movenet\output_video.avi
