In [1]:
import cv2
import mediapipe as mp
import numpy as np
import json
import os
import glob

# Configuration
IMAGE_FOLDER = 'im2'
STEP_NAMES = [
    "Ichi", "Ni", "San", "Shi", "Go", "Roku", "Shichi", "Hachi",
    "Ku", "Ju", "Ju Ichi", "Ju Ni", "Ju San", "Ju Shi",
    "Ju Go", "Ju Roku", "Ju Shichi", "Ju Hachi"
]

STEP_IMAGE_MAPPING = [
    ("Ichi", "screenshot_0.png"),       # Left Lower Block
    ("Ni", "screenshot_1.png"),         # Right Lunge Punch
    ("San", "screenshot_2.png"),        # Turn & Right Lower Block
    ("Shi", "screenshot_3.png"),        # Right Hammerfist Strike
    ("Go", "screenshot_4.png"),         # Left Lunge Punch
    ("Roku", "screenshot_5.png"),       # Turn & Left Lower Block
    ("Shichi", "screenshot_6.png"),     # Right Upper Block
    ("Hachi", "screenshot_7.png"),      # Left Upper Block
    ("Ku", "screenshot_8.png"),         # Right Upper Block (Kiai)
    ("Ju", "screenshot_9.png"),         # Turn & Left Down Block
    ("Ju Ichi", "screenshot_10.png"),   # Right Lunge Punch
    ("Ju Ni", "screenshot_11.png"),     # Turn & Right Down Block
    ("Ju San", "screenshot_12.png"),    # Left Lunge Punch
    ("Ju Shi", "screenshot_13.png"),    # Turn & Left Lower Block
    ("Ju Go", "screenshot_14.png"),     # Right Lunge Punch
    ("Ju Roku", "screenshot_15.png"),   # Left Lunge Punch
    ("Ju Shichi", "screenshot_16.png"), # Right Lunge Punch (2nd Kiai)
    ("Ju Hachi", "screenshot_17.png")   # Left Knife Hand Block
]


# MediaPipe setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5)

def normalize_landmarks(landmarks):
    """
    Normalize landmarks relative to hip center and scale by torso height
    This gives more consistent, readable values
    """
    # Get key reference points
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    
    # Calculate center points
    hip_center_x = (left_hip.x + right_hip.x) / 2
    hip_center_y = (left_hip.y + right_hip.y) / 2
    shoulder_center_y = (left_shoulder.y + right_shoulder.y) / 2
    
    # Calculate torso height for scaling
    torso_height = abs(shoulder_center_y - hip_center_y)
    if torso_height < 0.01:  # Prevent division by zero
        torso_height = 0.1
    
    # Normalize all landmarks
    normalized = []
    for lm in landmarks:
        normalized.append({
            'x': round((lm.x - hip_center_x) / torso_height, 3),
            'y': round((lm.y - hip_center_y) / torso_height, 3),
            'z': round(lm.z / torso_height, 3),
            'visibility': round(lm.visibility, 3)
        })
    
    return normalized

def get_key_points_summary(normalized_landmarks):
    """
    Extract key points for easier reading and debugging
    """
    key_indices = {
        'nose': mp_pose.PoseLandmark.NOSE.value,
        'left_shoulder': mp_pose.PoseLandmark.LEFT_SHOULDER.value,
        'right_shoulder': mp_pose.PoseLandmark.RIGHT_SHOULDER.value,
        'left_elbow': mp_pose.PoseLandmark.LEFT_ELBOW.value,
        'right_elbow': mp_pose.PoseLandmark.RIGHT_ELBOW.value,
        'left_wrist': mp_pose.PoseLandmark.LEFT_WRIST.value,
        'right_wrist': mp_pose.PoseLandmark.RIGHT_WRIST.value,
        'left_hip': mp_pose.PoseLandmark.LEFT_HIP.value,
        'right_hip': mp_pose.PoseLandmark.RIGHT_HIP.value,
        'left_knee': mp_pose.PoseLandmark.LEFT_KNEE.value,
        'right_knee': mp_pose.PoseLandmark.RIGHT_KNEE.value,
        'left_ankle': mp_pose.PoseLandmark.LEFT_ANKLE.value,
        'right_ankle': mp_pose.PoseLandmark.RIGHT_ANKLE.value
    }
    
    summary = {}
    for name, idx in key_indices.items():
        point = normalized_landmarks[idx]
        summary[name] = {
            'x': point['x'],
            'y': point['y'],
            'z': point['z']
        }
    
    return summary

def process_images():
    """
    Main processing function
    """
    print(f" Processing Heian Shodan images from: {IMAGE_FOLDER}")
    reference_data = []
    
    for step_name, image_pattern in STEP_IMAGE_MAPPING:
        print(f"\n Processing step: {step_name} [{image_pattern}]")
        
        # Find matching images
        image_path = os.path.join(IMAGE_FOLDER, image_pattern)
        image_files = glob.glob(image_path)
        
        if not image_files:
            print(f"  No images found for: {image_path}")
            continue
        
        valid_poses = []
        
        for image_file in image_files:
            print(f"   Processing: {os.path.basename(image_file)}")
            
            # Load and process image
            frame = cv2.imread(image_file)
            if frame is None:
                print(f"    Could not load: {image_file}")
                continue
            
            # Convert to RGB for MediaPipe
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(image_rgb)
            
            if results.pose_landmarks:
                print(results.pose_landmarks.landmark)
                normalized = normalize_landmarks(results.pose_landmarks.landmark)
                valid_poses.append(normalized)
                print(f"    Pose detected and normalized")
            else:
                print(f"     No pose detected")
        
        # Process valid poses
        if valid_poses:
            final_pose = valid_poses[0]
            
            # Get key points summary for display
            key_points = get_key_points_summary(final_pose)
            
            # Store the reference data
            step_data = {
                "step_name": step_name,
                "step_number": len(reference_data) + 1,
                "normalized_landmarks": final_pose,
                "key_points_summary": key_points
            }
            
            reference_data.append(step_data)
            print(f"    Step '{step_name}' processed successfully")
            
            # Print key points for debugging
            print(f"     Key points preview:")
            print(f"      Left wrist:  ({key_points['left_wrist']['x']:.2f}, {key_points['left_wrist']['y']:.2f})")
            print(f"      Right wrist: ({key_points['right_wrist']['x']:.2f}, {key_points['right_wrist']['y']:.2f})")
            print(f"      Left knee:   ({key_points['left_knee']['x']:.2f}, {key_points['left_knee']['y']:.2f})")
            print(f"      Right knee:  ({key_points['right_knee']['x']:.2f}, {key_points['right_knee']['y']:.2f})")
            
        else:
            print(f"    No valid poses found for step: {step_name}")
    
    return reference_data

def save_reference_data(reference_data):
    """
    Save the processed data to JSON file
    """
    output_file = "heian_shodan_reference.json"
    
    # Create a more organized output structure
    output_data = {
        "kata_name": "Heian Shodan",
        "total_steps": len(reference_data),
        "created_from_images": True,
        "normalization_method": "hip_center_torso_scaled",
        "steps": reference_data
    }
    
    with open(output_file, "w") as f:
        json.dump(output_data, f, indent=2)
    
    print(f"\n SUCCESS!")
    print(f"   Processed {len(reference_data)} steps")
    print(f"   Saved to: {output_file}")
    
    return output_file

def print_summary(reference_data):
    """
    Print a nice summary of what was processed
    """
    print(f"\n PROCESSING SUMMARY")
    print(f"{'='*50}")
    
    for i, step in enumerate(reference_data, 1):
        step_name = step['step_name']
        key_points = step['key_points_summary']
        
        print(f"{i:2d}. {step_name:<12} | "
              f"L.Wrist: ({key_points['left_wrist']['x']:+.1f},{key_points['left_wrist']['y']:+.1f}) | "
              f"R.Wrist: ({key_points['right_wrist']['x']:+.1f},{key_points['right_wrist']['y']:+.1f})")

if __name__ == "__main__":
    # Process all images
    reference_data = process_images()
    
    if reference_data:
        # Save to file
        output_file = save_reference_data(reference_data)
        
        # Print summary
        print_summary(reference_data)
        
        print(f"\n TIP: The normalized coordinates are now:")
        print(f"   • Centered on hip center (0,0)")
        print(f"   • Scaled by torso height")
        print(f"   • Rounded to 3 decimal places")
        print(f"   • Much more readable!")
        
    else:
        print(f"\n No reference data was created. Check your image folder and file names.")

 Processing Heian Shodan images from: im2

 Processing step: Ichi [screenshot_0.png]
   Processing: screenshot_0.png
[x: 0.72122395
y: 0.577296853
z: -0.417797953
visibility: 0.99998486
, x: 0.736277
y: 0.57577461
z: -0.380950719
visibility: 0.999968529
, x: 0.736053109
y: 0.575852275
z: -0.380907834
visibility: 0.999965072
, x: 0.735822618
y: 0.575878441
z: -0.380925983
visibility: 0.999962687
, x: 0.738106906
y: 0.572933555
z: -0.422439039
visibility: 0.999969363
, x: 0.739150763
y: 0.571074188
z: -0.422550678
visibility: 0.99996984
, x: 0.740234256
y: 0.569098413
z: -0.422575504
visibility: 0.999960065
, x: 0.731737852
y: 0.568477929
z: -0.172177255
visibility: 0.999924898
, x: 0.736758
y: 0.559096098
z: -0.370159
visibility: 0.999970555
, x: 0.704827666
y: 0.574480712
z: -0.337200969
visibility: 0.999958515
, x: 0.707033634
y: 0.570927322
z: -0.394950777
visibility: 0.99997735
, x: 0.649693727
y: 0.581529081
z: -0.00208196486
visibility: 0.999974966
, x: 0.677743256
y: 0.518577516


I0000 00:00:1754516092.658800 1460636 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1754516092.760304 1460849 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1754516092.774975 1460849 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1754516092.792112 1460851 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


[x: 0.710302472
y: 0.737614512
z: 0.313037753
visibility: 0.999998331
, x: 0.726028621
y: 0.736249626
z: 0.31650582
visibility: 0.999998331
, x: 0.727543056
y: 0.735060334
z: 0.316522092
visibility: 0.999998
, x: 0.729079843
y: 0.733798921
z: 0.316461891
visibility: 0.999998808
, x: 0.725468338
y: 0.737894475
z: 0.271134973
visibility: 0.999998689
, x: 0.726375401
y: 0.737965941
z: 0.271216512
visibility: 0.999998331
, x: 0.727196574
y: 0.738002598
z: 0.271212459
visibility: 0.999998927
, x: 0.731539667
y: 0.725375235
z: 0.327388644
visibility: 0.999998689
, x: 0.727644861
y: 0.733571649
z: 0.11846038
visibility: 0.999996543
, x: 0.703439772
y: 0.733755171
z: 0.324454069
visibility: 0.999989152
, x: 0.702640831
y: 0.735031366
z: 0.263234764
visibility: 0.999989629
, x: 0.665564239
y: 0.680075705
z: 0.369512618
visibility: 0.99993062
, x: 0.675456882
y: 0.733275354
z: -0.0551394485
visibility: 0.999995112
, x: 0.580744088
y: 0.657398283
z: 0.598027587
visibility: 0.491076201
, x: 0.6397