In [39]:
!pip install opencv-python
!pip install mediapipe



In [40]:
import cv2
import mediapipe as mp
import numpy as np
import os
import json

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [41]:
os.path.join(os.getcwd(), 'pose_landmarker_full.task')

'c:\\Users\\USER\\workspace\\idol\\pose_landmarker_full.task'

In [42]:
# Define the create_pose_landmarker function
def create_pose_landmarker():
    model_path = os.path.join(os.getcwd(), 'pose_landmarker_full.task')
    options = mp.tasks.vision.PoseLandmarkerOptions(
        base_options=mp.tasks.BaseOptions(model_asset_path=model_path),
        running_mode=mp.tasks.vision.RunningMode.VIDEO,
        num_poses=1,
        min_pose_detection_confidence=0.5,
        min_pose_presence_confidence=0.5,
        min_tracking_confidence=0.5,
        output_segmentation_masks=False
    )
    return mp.tasks.vision.PoseLandmarker.create_from_options(options)

# Define the process_video function
def process_video(video_path, landmarker, duration_s=3):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(duration_s * fps)
    poses = []
    frame_count = 0
    
    while cap.isOpened() and frame_count < total_frames:
        success, image = cap.read()
        if not success:
            break
        
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        
        # Use frame count for timestamp instead of CAP_PROP_POS_MSEC
        frame_timestamp_ms = int(frame_count * (1000 / fps))
        
        pose_landmarker_result = landmarker.detect_for_video(mp_image, frame_timestamp_ms)
        
        if pose_landmarker_result.pose_landmarks:
            poses.append(pose_landmarker_result.pose_landmarks[0])
        
        frame_count += 1
    
    cap.release()
    return poses

# Function to convert pose landmarks to a list of dictionaries
def poses_to_json_data(poses):
    json_data = []
    for pose in poses:
        landmarks = []
        for landmark in pose:
            landmarks.append({
                "x": landmark.x,
                "y": landmark.y,
                "z": landmark.z
            })
        json_data.append(landmarks)
    return json_data

# Main execution code
expert_video = "./expert_dance.mp4"
user_video = "./user_dance.mp4"

with create_pose_landmarker() as landmarker:
    print("Processing expert video...")
    expert_poses = process_video(expert_video, landmarker, duration_s=19)
    print(f"Extracted {len(expert_poses)} poses from expert video")

    # Convert expert_poses to JSON format
    expert_poses_json = poses_to_json_data(expert_poses)
    
    # Save expert_poses to a JSON file
    with open("expert_poses.json", "w") as f:
        json.dump({"export_poses": expert_poses_json}, f, indent=4)
    print("Saved expert poses to expert_poses.json")

Processing expert video...
Extracted 569 poses from expert video
Saved expert poses to expert_poses.json


In [43]:
with create_pose_landmarker() as landmarker:
    print("Processing user video...")
    user_poses = process_video(user_video, landmarker, duration_s=21)
    print(f"Extracted {len(user_poses)} poses from user video")

Processing user video...
Extracted 630 poses from user video


In [44]:
def format_pose_data(expert_poses, user_poses):
    # 포즈 데이터를 Gemini에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[:150]  # 처음 150개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[:150]  # 처음 150개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

In [45]:
formatted_data = format_pose_data(expert_poses, user_poses)

In [46]:
!pip install -q -U google-generativeai

# Compare the following expert and user pose data

## Expert and User video

In [47]:
from IPython.display import display, HTML, Video

video1 = Video("./expert_dance.mp4")
video2 = Video("./user_dance.mp4")

html_code = """
<div style="display: flex;">
  <div style="flex: 1;">{video1}</div>
  <div style="flex: 1;">{video2}</div>
</div>
""".format(video1=video1._repr_html_(), video2=video2._repr_html_())

display(HTML(html_code))

## Expert and User pose video

In [48]:
from IPython.display import display, HTML, Video

p_video1 = Video("./expert_pose.mp4")
p_video2 = Video("./user_pose.mp4")

html_code = """
<div style="display: flex;">
  <div style="flex: 1;">{p_video1}</div>
  <div style="flex: 1;">{p_video2}</div>
</div>
""".format(p_video1=p_video1._repr_html_(), p_video2=p_video2._repr_html_())

display(HTML(html_code))

## 0~150 Frame Poses Compare

In [49]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

system = { "role": "system", "content": f"You are a dance instructor analyzing pose data. \
          Response Form: markdown \
          # Feedback: Pose data anlysis(0~5s)\
          \
          **1. feedback: ** \
          ** enhancement: ** \
          \
          **2. feedback: ** \
          ** enhancement: ** \
          \
          **3. feedback: ** \
          ** enhancement: ** \
          \
          **4. feedback: ** \
          ** enhancement: ** \
          ... \
          \
          ## Weekness Point: \
          ## How to training: \
          "}

user = { "role": "user", "content": f"Compare the following expert and user pose data for approximately 5 second of 150 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements. {formatted_data}"}

response = model.generate_content(f"[{system}, {user}]")

print(response.text)

# Feedback: Pose data anlysis(0~5s)
**1. feedback: **  The user's left arm is not straight during the pose.
** enhancement: **  Focus on keeping the left arm straight in all frames.

**2. feedback: **  The user's left hand is not in the correct position and seems to be too far from the body.
** enhancement: **  Keep the left hand closer to the body and practice the correct hand placement.

**3. feedback: **  The user's right hand is not in the correct position and seems to be too far from the body.
** enhancement: **  Keep the right hand closer to the body and practice the correct hand placement.

**4. feedback: **  The user's right leg is not straight during the pose.
** enhancement: **  Focus on keeping the right leg straight in all frames.

**5. feedback: ** The user's left leg is not straight during the pose.
** enhancement: **  Focus on keeping the left leg straight in all frames.

**6. feedback: ** The user's hips are not in the correct position and seem to be too low.
** enhance

In [50]:
def format_pose_data2(expert_poses, user_poses):
    # 포즈 데이터를 GPT에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[150:300]  # 처음 90개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[150:300]  # 처음 90개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

In [51]:
formatted_data2 = format_pose_data2(expert_poses, user_poses)

## 151~300 Frame Poses Compare

In [52]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

system2 = { "role": "system", "content": f"You are a dance instructor analyzing pose data. \
          Response Form: markdown \
          # Feedback: Pose data anlysis(5~10s)\
          \
          **1. feedback: ** \
          ** enhancement: ** \
            \
          **2. feedback: ** \
          ** enhancement: ** \
            \
          **3. feedback: ** \
          ** enhancement: ** \
            \
          **4. feedback: ** \
          ** enhancement: ** \
          ... \
            \
          ## Weekness Point: \
          ## How to training: \
          "}

user2 = { "role": "user", "content": f"Compare the following expert and user pose data for approximately between 5 and 10 second of 150 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements. {formatted_data2}"}

response2 = model.generate_content(f"[{system2}, {user2}]")

print(response2.text)

# Feedback: Pose data anlysis(5~10s)
**1. feedback: ** The user's left leg is not as extended as the expert's left leg. 
** enhancement: ** The user should focus on extending the left leg more to match the expert's pose.
**2. feedback: ** The user's right leg is not fully extended during the final pose. 
** enhancement: ** The user needs to straighten out the right leg at the end of the move.
**3. feedback: ** The user's right arm is not as high as the expert's right arm. 
** enhancement: ** The user should raise the right arm to match the expert's height. 
**4. feedback: ** User's right leg is not as straight and the knee is not pointed at the correct angle in the final pose. 
** enhancement: ** The user should straighten the knee to match the expert's leg.

## Weekness Point: 
- The user is not fully extending their right leg and left leg.
- The user is not fully extending their right arm. 
- The user's leg and knee angle is incorrect in the final pose. 

## How to training: 
- User 

## 301~450 Frame Poses Compare

In [53]:
def format_pose_data3(expert_poses, user_poses):
    # 포즈 데이터를 GPT에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[300:450]  # 처음 90개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[300:450]  # 처음 90개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

formatted_data3 = format_pose_data3(expert_poses, user_poses)

import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

system3 = { "role": "system", "content": f"You are a dance instructor analyzing pose data. \
            \
          Response Form: markdown \
          # Feedback: Pose data anlysis(10~15s)\
          \
          **1. feedback: ** \
          ** enhancement: ** \
          \
          **2. feedback: ** \
          ** enhancement: ** \
          \
          **3. feedback: ** \
          ** enhancement: ** \
          \
          **4. feedback: ** \
          ** enhancement: ** \
          ... \
          \
          ## Weekness Point: \
          ## How to training: \
          "}

user3 = { "role": "user", "content": f"Compare the following expert and user pose data for approximately between 10 and 15 second of 150 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements. {formatted_data3}"}

response3 = model.generate_content(f"[{system3}, {user3}]")

print(response3.text)

# Feedback: Pose data anlysis(10~15s)
**1. feedback: ** The user is leaning too far forward in the upper body. This can lead to imbalances and reduce overall efficiency.
** enhancement: ** Maintain a more upright posture, keeping the spine straight and the shoulders relaxed. 

**2. feedback: ** The user's left arm is not extending fully during the movement, which limits the range of motion.
** enhancement: ** Focus on extending the left arm fully, reaching outward while keeping the shoulder engaged.

**3. feedback: ** The user's right leg is not fully extending during the leg sweep, resulting in a less dynamic move.
** enhancement: ** Actively push through the right heel to extend the leg fully and create a stronger sweep.

**4. feedback: ** The user's right foot placement is inconsistent, sometimes too far back and sometimes too close to the body. 
** enhancement: ** Practice maintaining a consistent right foot position, keeping it close enough to the body for stability but not so clo

## 451~ Frame Poses Compare
- [Expert Images](expert/frame_451.jpg)
- [User Images](user/frame_451.jpg)

In [54]:
def format_pose_data4(expert_poses, user_poses):
    # 포즈 데이터를 GPT에 전송할 수 있는 형식으로 변환
    formatted_data = {
        "expert_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in expert_poses[450:]  # 처음 90개 프레임만 사용
        ],
        "user_poses": [
            [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose]
            for pose in user_poses[450:]  # 처음 90개 프레임만 사용
        ]
    }
    return json.dumps(formatted_data)

formatted_data4 = format_pose_data4(expert_poses, user_poses)

import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-flash')

system4 = { "role": "system", "content": f"You are a dance instructor analyzing pose data. \
           Response Form: markdown \
           # Feedback: Pose data anlysis(15s~)\
           \
           **1. feedback: ** \
           ** enhancement: ** \
           \
           **2. feedback: ** \
           ** enhancement: ** \
           \
           **3. feedback: ** \
           ** enhancement: ** \
           \
           **4. feedback: ** \
           ** enhancement: ** \
           ... \
           \
           ## Weekness Point: \
           ## How to training: \
           "}

user4 = { "role": "user", "content": f"Compare the following expert and user pose data for approximately after 15 second of 150 frames and provide feedback on the user's performance. Focus on major differences and suggest improvements. {formatted_data4}"}

response4 = model.generate_content(f"[{system4}, {user4}]")

print(response4.text)

# Feedback: Pose data anlysis(15s~)
**1. feedback: ** The user's right shoulder is lower than the expert's. This is particularly noticeable around frame 130.
** enhancement: ** The user should try to keep their right shoulder at a similar height to their left shoulder. This will help to create a more balanced and symmetrical pose.
**2. feedback: ** The user's left hip is slightly higher than the expert's.
** enhancement: ** The user should try to keep their hips at a similar height. This will help to create a more stable and grounded base.
**3. feedback: ** The user's left knee is less bent than the expert's, which makes the user's pose less dynamic.
** enhancement: ** The user should try to bend their left knee more deeply, as this will create a more powerful and dynamic pose.
**4. feedback: ** The user's left foot is not pointing as far out as the expert's, indicating a lack of engagement in the left leg.
** enhancement: ** The user should try to extend their left foot out further, a