In [1]:
!pip install opencv-python
!pip install mediapipe



In [2]:
import cv2
import mediapipe as mp
import numpy as np
import os
import json

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [3]:
os.path.join(os.getcwd(), 'pose_landmarker_full.task')

'c:\\Users\\USER\\workspace\\idol\\pose_landmarker_full.task'

In [4]:
# Define the create_pose_landmarker function
def create_pose_landmarker():
    model_path = os.path.join(os.getcwd(), 'pose_landmarker_full.task')
    options = mp.tasks.vision.PoseLandmarkerOptions(
        base_options=mp.tasks.BaseOptions(model_asset_path=model_path),
        running_mode=mp.tasks.vision.RunningMode.VIDEO,
        num_poses=1,
        min_pose_detection_confidence=0.5,
        min_pose_presence_confidence=0.5,
        min_tracking_confidence=0.5,
        output_segmentation_masks=False
    )
    return mp.tasks.vision.PoseLandmarker.create_from_options(options)

# Define the process_video function
def process_video(video_path, landmarker, duration_s=6):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(duration_s * fps)
    poses = []
    frame_count = 0
    
    while cap.isOpened() and frame_count < total_frames:
        success, image = cap.read()
        if not success:
            break
        
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        
        # Use frame count for timestamp instead of CAP_PROP_POS_MSEC
        frame_timestamp_ms = int(frame_count * (1000 / fps))
        
        pose_landmarker_result = landmarker.detect_for_video(mp_image, frame_timestamp_ms)
        
        if pose_landmarker_result.pose_landmarks:
            poses.append(pose_landmarker_result.pose_landmarks[0])
        
        frame_count += 1
    
    cap.release()
    return poses

# Define the compare_poses function
def compare_poses(expert_poses, user_poses):
    min_length = min(len(expert_poses), len(user_poses))
    differences = []
    
    for i in range(min_length):
        diff = 0
        for expert_lm, user_lm in zip(expert_poses[i], user_poses[i]):
            diff += np.sqrt((expert_lm.x - user_lm.x)**2 + 
                            (expert_lm.y - user_lm.y)**2 + 
                            (expert_lm.z - user_lm.z)**2)
        differences.append(diff)
    
    return differences

# Define the generate_feedback function
def generate_feedback(differences):
    feedback = []
    threshold = 2.0  # This value may need adjustment
    for i, diff in enumerate(differences):
        if diff > threshold:
            feedback.append(f"Frame {i}: Pose improvement needed. Difference: {diff:.2f}")
    return feedback

# Main execution code
expert_video = "./expert_dance.mp4"
user_video = "./user_dance.mp4"

with create_pose_landmarker() as landmarker:
    print("Processing expert video...")
    expert_poses = process_video(expert_video, landmarker)
    print(f"Extracted {len(expert_poses)} poses from expert video")

Processing expert video...




Extracted 180 poses from expert video


In [5]:
with create_pose_landmarker() as landmarker:
    print("Processing user video...")
    user_poses = process_video(user_video, landmarker)
    print(f"Extracted {len(user_poses)} poses from user video")

Processing user video...
Extracted 180 poses from user video


In [6]:
def format_pose_data(expert_poses, user_poses):
    # 포즈 데이터를 GPT에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[:90]  # 처음 90개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[:90]  # 처음 90개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

In [7]:
formatted_data = format_pose_data(expert_poses, user_poses)

In [8]:
formatted_data

'{"expert_poses": [[{"x": 0.4878964424133301, "y": 0.46058711409568787, "z": -0.4769990146160126}, {"x": 0.4996036887168884, "y": 0.45077046751976013, "z": -0.44850894808769226}, {"x": 0.5071532726287842, "y": 0.4509001672267914, "z": -0.44864434003829956}, {"x": 0.5133576393127441, "y": 0.45127829909324646, "z": -0.44887012243270874}, {"x": 0.4771316945552826, "y": 0.4514254927635193, "z": -0.4436333477497101}, {"x": 0.4704896807670593, "y": 0.4521044194698334, "z": -0.4436315894126892}, {"x": 0.4645884335041046, "y": 0.4528793394565582, "z": -0.44362953305244446}, {"x": 0.5211023092269897, "y": 0.4565342962741852, "z": -0.24966996908187866}, {"x": 0.45864295959472656, "y": 0.45840510725975037, "z": -0.22780343890190125}, {"x": 0.5043230652809143, "y": 0.47126132249832153, "z": -0.39846569299697876}, {"x": 0.47536250948905945, "y": 0.47122636437416077, "z": -0.3925473392009735}, {"x": 0.5679296851158142, "y": 0.5148597955703735, "z": -0.07498189806938171}, {"x": 0.43020978569984436, "

In [9]:
print("""
    {"role": "system", "content": "You are a dance instructor analyzing pose data."},
    {"role": "user", "content": f"Compare the following expert and user pose data for approximately 1 second of 30 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements.

        MediaPipe Pose Landmark Data: 
    """+formatted_data+ "}")


    {"role": "system", "content": "You are a dance instructor analyzing pose data."},
    {"role": "user", "content": f"Compare the following expert and user pose data for approximately 1 second of 30 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements.

        MediaPipe Pose Landmark Data: 
    {"expert_poses": [[{"x": 0.4878964424133301, "y": 0.46058711409568787, "z": -0.4769990146160126}, {"x": 0.4996036887168884, "y": 0.45077046751976013, "z": -0.44850894808769226}, {"x": 0.5071532726287842, "y": 0.4509001672267914, "z": -0.44864434003829956}, {"x": 0.5133576393127441, "y": 0.45127829909324646, "z": -0.44887012243270874}, {"x": 0.4771316945552826, "y": 0.4514254927635193, "z": -0.4436333477497101}, {"x": 0.4704896807670593, "y": 0.4521044194698334, "z": -0.4436315894126892}, {"x": 0.4645884335041046, "y": 0.4528793394565582, "z": -0.44362953305244446}, {"x": 0.5211023092269897, "y": 0.4565342962741852, "z": -0.249669969081878

In [10]:
!pip install -q -U google-generativeai

In [11]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

response = model.generate_content("""
    {"role": "system", "content": "You are a dance instructor analyzing pose data.", "lang": "kr" },
    {"role": "user", "content": f"Compare the following expert and user pose data for approximately 3 second of 90 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements.

        MediaPipe Pose Landmark Data:
    """+formatted_data+ "}")
print(response.text)

  from .autonotebook import tqdm as notebook_tqdm


## 춤 강사의 포즈 데이터 분석 피드백:

**전반적인 평가:** 사용자의 움직임은 전문가의 움직임과 비교하여 약간의 차이가 있습니다. 특히 팔과 다리의 위치에 차이가 있으며, 이는 춤의 흐름과 미학에 영향을 미칠 수 있습니다. 

**주요 차이점:**

* **왼쪽 팔:** 사용자의 왼쪽 팔은 전문가에 비해 덜 뻗어 있으며, 팔꿈치가 더 구부러져 있습니다. 
* **오른쪽 다리:** 사용자의 오른쪽 다리는 전문가에 비해 덜 뻗어 있습니다. 
* **상체:** 사용자의 상체는 전문가에 비해 약간 더 앞으로 기울어져 있습니다. 

**개선 제안:**

1. **팔 뻗기 연습:** 왼쪽 팔을 전문가처럼 좀 더 완전히 뻗는 연습이 필요합니다.  벽이나 바를 잡고 팔을 쭉 뻗는 스트레칭을 통해 팔의 유연성을 높일 수 있습니다. 
2. **다리 뻗기 연습:** 오른쪽 다리를 더 뻗어서 전문가의 움직임을 따라할 수 있도록 노력해야 합니다. 스트레칭과 다리 근력 강화 운동이 도움이 될 수 있습니다.
3. **상체 자세 유지:** 상체를 전문가처럼 똑바로 세우고, 약간 뒤로 젖히는 연습을 통해 자세를 교정할 수 있습니다. 이는 균형을 유지하는 데에도 도움이 됩니다.

**추가 피드백:**

* **타이밍:** 사용자의 움직임은 전문가와 비슷한 타이밍을 유지하고 있습니다. 
* **속도:** 사용자의 움직임은 전문가와 비슷한 속도를 유지하고 있습니다.
* **전반적인 흐름:**  사용자의 움직임은 전문가와 비슷한 흐름을 보여주지만, 팔과 다리의 위치를 개선하면 좀 더 자연스럽고 균형 잡힌 움직임을 보여줄 수 있습니다. 

**전문가의 데이터를 참고하여 사용자의 움직임을 분석하고 개선하는 것은 춤 실력을 향상시키는 데 매우 효과적입니다.**  꾸준히 연습하고 피드백을 반영하여 춤 실력을 향상시키기 바랍니다. 



In [12]:
def format_pose_data2(expert_poses, user_poses):
    # 포즈 데이터를 GPT에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[90:180]  # 처음 90개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[90:180]  # 처음 90개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

In [13]:
formatted_data2 = format_pose_data2(expert_poses, user_poses)

In [14]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

response = model.generate_content("""
    {"role": "system", "content": "You are a dance instructor analyzing pose data.", "lang": "kr" },
    {"role": "user", "content": f"Compare the following expert and user pose data for approximately 3 second of 90 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements.

        MediaPipe Pose Landmark Data:
    """+formatted_data2+ "}")
print(response.text)

InvalidArgument: 400 API key expired. Please renew the API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
]