In [1]:
# ==============================================
#  Heian Shodan - Enhanced Video Kata Detection with Fixed Time Perception and Key Points Preview
# ==============================================

import cv2
import mediapipe as mp
import numpy as np
import json
import time
import csv
import os

# ==============================================
#  Configuration
INPUT_VIDEO = 'vid.mp4'
OUTPUT_VIDEO = 'output_detected_simple.mp4'
OUTPUT_CSV = 'match_stats_simple.csv'
SIMILARITY_THRESHOLD = 0.55  # Slightly lower due to stricter hybrid matching
STEP_TIMEOUT_SECONDS = 3
START_DELAY_SECONDS = 1
MIN_HOLD_TIME = 0.01 # Minimum time to hold pose

# HYBRID SIMILARITY PARAMETERS
DISTANCE_THRESHOLD = 3.0  # Max allowed average distance between key joints
COSINE_WEIGHT = 0.6       # Weight for angle-based similarity (0-1)
DISTANCE_WEIGHT = 0.4     # Weight for position-based similarity (0-1)

# ==============================================
#  Load reference poses with proper JSON handling
print(" Loading reference poses...")
try:
    with open('heian_shodan_reference.json') as f:
        reference_file_data = json.load(f)
    
    # Handle nested JSON structure
    if isinstance(reference_file_data, dict) and "steps" in reference_file_data:
        reference_data = reference_file_data["steps"]
        print(f" Loaded {len(reference_data)} steps from nested JSON structure")
    else:
        reference_data = reference_file_data
        print(f" Loaded {len(reference_data)} steps from direct array")
        
except FileNotFoundError:
    print(" Error: heian_shodan_reference.json not found!")
    exit()
except json.JSONDecodeError as e:
    print(f" Error: Invalid JSON format - {e}")
    exit()

kata_steps = [
    "Ichi: Left Lower Block",
    "Ni: Right Lunge Punch", 
    "San: Turn & Right Lower Block",
    "Shi: Right Hammerfist Strike",
    "Go: Left Lunge Punch",
    "Roku: Turn & Left Lower Block",
    "Shichi: Right Upper Block",
    "Hachi: Left Upper Block",
    "Ku: Right Upper Block (Kiai)",
    "Ju: Turn & Left Down Block",
    "Ju Ichi: Right Lunge Punch",
    "Ju Ni: Turn & Right Down Block",
    "Ju San: Left Lunge Punch",
    "Ju Shi: Turn & Left Lower Block",
    "Ju Go: Right Lunge Punch",
    "Ju Roku: Left Lunge Punch",
    "Ju Shichi: Right Lunge Punch (2nd Kiai)",
    "Ju Hachi: Left Knife Hand Block"
]

# ==============================================
#  Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, min_detection_confidence=0.5)
drawing = mp.solutions.drawing_utils

# ==============================================
#  Helper: Normalize landmarks (improved version)
def normalize_landmarks(landmarks):
    """
    Normalize landmarks relative to hip center and scale by torso height
    This gives more consistent, readable values
    """
    # Get key reference points
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    
    # Calculate center points
    hip_center_x = (left_hip.x + right_hip.x) / 2
    hip_center_y = (left_hip.y + right_hip.y) / 2
    shoulder_center_y = (left_shoulder.y + right_shoulder.y) / 2
    
    # Calculate torso height for scaling
    torso_height = abs(shoulder_center_y - hip_center_y)
    if torso_height < 0.01:  # Prevent division by zero
        torso_height = 0.1
    
    # Normalize all landmarks
    normalized = []
    for lm in landmarks:
        normalized.append([
            round((lm.x - hip_center_x) / torso_height, 3),
            round((lm.y - hip_center_y) / torso_height, 3),
            round(lm.z / torso_height, 3)
        ])
    
    return np.array(normalized).flatten()

# ==============================================
#  Helper: Extract key points preview from normalized pose
def get_key_points_preview(normalized_pose):
    """
    Extract key body points from normalized pose for debugging display
    Returns formatted string with key joint positions
    """
    if normalized_pose is None:
        return " No pose data available"
    
    # Reshape to 33 joints x 3 coordinates
    try:
        pose_3d = np.reshape(normalized_pose, (33, 3))
        
        # Extract key points (using MediaPipe landmark indices)
        left_wrist = pose_3d[mp_pose.PoseLandmark.LEFT_WRIST.value][:2]  # x, y only
        right_wrist = pose_3d[mp_pose.PoseLandmark.RIGHT_WRIST.value][:2]
        left_knee = pose_3d[mp_pose.PoseLandmark.LEFT_KNEE.value][:2]
        right_knee = pose_3d[mp_pose.PoseLandmark.RIGHT_KNEE.value][:2]
        left_elbow = pose_3d[mp_pose.PoseLandmark.LEFT_ELBOW.value][:2]
        right_elbow = pose_3d[mp_pose.PoseLandmark.RIGHT_ELBOW.value][:2]
        
        preview = f"""    Key points preview:
      Left wrist:  ({left_wrist[0]:.2f}, {left_wrist[1]:.2f})
      Right wrist: ({right_wrist[0]:.2f}, {right_wrist[1]:.2f})
      Left elbow:  ({left_elbow[0]:.2f}, {left_elbow[1]:.2f})
      Right elbow: ({right_elbow[0]:.2f}, {right_elbow[1]:.2f})
      Left knee:   ({left_knee[0]:.2f}, {left_knee[1]:.2f})
      Right knee:  ({right_knee[0]:.2f}, {right_knee[1]:.2f})"""
        
        return preview
        
    except Exception as e:
        return f" Error extracting key points: {e}"

# ==============================================
#  Helper: Compare user pose vs reference pose with detailed analysis
def compare_poses_detailed(user_pose, reference_pose, step_name="Unknown"):
    """
    Compare user pose vs reference pose and show detailed side-by-side analysis
    Returns formatted comparison string with differences
    """
    if user_pose is None or reference_pose is None:
        return " Cannot compare - missing pose data"
    
    try:
        user_3d = np.reshape(user_pose, (33, 3))
        ref_3d = np.reshape(reference_pose, (33, 3))
        
        # Extract key points for both poses
        joints = {
            'Left wrist': mp_pose.PoseLandmark.LEFT_WRIST.value,
            'Right wrist': mp_pose.PoseLandmark.RIGHT_WRIST.value,
            'Left elbow': mp_pose.PoseLandmark.LEFT_ELBOW.value,
            'Right elbow': mp_pose.PoseLandmark.RIGHT_ELBOW.value,
            'Left knee': mp_pose.PoseLandmark.LEFT_KNEE.value,
            'Right knee': mp_pose.PoseLandmark.RIGHT_KNEE.value
        }
        
        comparison = f"""    POSE COMPARISON for '{step_name}':
   {'='*60}
   {'Joint':<12} {'User Pose':<18} {'Reference':<18} {'Difference':<12}
   {'-'*60}"""
        
        total_difference = 0
        joint_count = 0
        
        for joint_name, joint_idx in joints.items():
            user_point = user_3d[joint_idx][:2]  # x, y only
            ref_point = ref_3d[joint_idx][:2]
            
            # Calculate euclidean distance difference
            diff_x = user_point[0] - ref_point[0]
            diff_y = user_point[1] - ref_point[1]
            euclidean_diff = np.sqrt(diff_x**2 + diff_y**2)
            
            total_difference += euclidean_diff
            joint_count += 1
            
            # Format the comparison line
            user_str = f"({user_point[0]:.2f}, {user_point[1]:.2f})"
            ref_str = f"({ref_point[0]:.2f}, {ref_point[1]:.2f})"
            diff_str = f"{euclidean_diff:.3f}"
            
            # Add color coding based on difference magnitude
            if euclidean_diff < 2.0:
                status = "✅"
            elif euclidean_diff < 5.0:
                status = "⚠️"
            else:
                status = "❌"
            
            comparison += f"\n   {joint_name:<12} {user_str:<18} {ref_str:<18} {diff_str:<8} {status}"
        
        # Add summary statistics
        avg_difference = total_difference / joint_count if joint_count > 0 else 0
        comparison += f"\n   {'-'*60}"
        comparison += f"\n   Average difference: {avg_difference:.3f}"
        
        if avg_difference < 2.0:
            comparison += " ✅ EXCELLENT MATCH!"
        elif avg_difference < 3.5:
            comparison += " ⚠️  GOOD MATCH"
        elif avg_difference < 6.0:
            comparison += " ⚠️  FAIR MATCH - Minor adjustments needed"
        else:
            comparison += " ❌ POOR MATCH - Significant adjustments needed"
            
        return comparison
        
    except Exception as e:
        return f"❌ Error comparing poses: {e}"

# ==============================================
#  Helper: Extract reference pose data
def extract_reference_pose(step_data):
    """
    Extract reference pose from the JSON data structure
    """
    if "normalized_landmarks" in step_data:
        landmarks = step_data["normalized_landmarks"]
    elif "landmarks_2d" in step_data:
        landmarks = step_data["landmarks_2d"]
    else:
        print(f" Warning: No landmarks found in step data")
        return None
    
    # Convert to the same format as normalized_landmarks function
    pose_array = []
    for lm in landmarks:
        if isinstance(lm, dict):
            pose_array.append([lm['x'], lm['y'], lm['z']])
        else:
            # Handle different data structures
            pose_array.append([lm[0], lm[1], lm[2]])
    
    return np.array(pose_array).flatten()

# ==============================================
#  Helper: HYBRID Pose Similarity - Combines Cosine Similarity + Distance Constraint
def pose_similarity(pose1, pose2, distance_threshold=3.0, cos_weight=0.6, dist_weight=0.4):
    """
    HYBRID pose similarity that combines:
    1. Cosine similarity (for angle/relative positioning) 
    2. Distance constraint (for absolute positioning)
    
    This prevents false positives where poses have similar angles but different positions.
    
    Args:
        pose1, pose2: Normalized pose arrays
        distance_threshold: Max allowed average distance between key joints
        cos_weight: Weight for cosine similarity component (0-1)
        dist_weight: Weight for distance component (0-1)
    """
    if pose1 is None or pose2 is None:
        return 0.0
    
    p1 = np.reshape(pose1, (33, 3))  # 33 joints, each with (x,y,z)
    p2 = np.reshape(pose2, (33, 3))
    
    # ==============================================
    # PART 1: COSINE SIMILARITY (Angle-based)
    # ==============================================
    joint_weights = np.ones(33)  # Default weight of 1.0 for all joints
    
    # ARM AND WRIST JOINTS - Give them higher importance
    arm_wrist_indices = [
        mp_pose.PoseLandmark.LEFT_SHOULDER.value,    # 11
        mp_pose.PoseLandmark.RIGHT_SHOULDER.value,   # 12  
        mp_pose.PoseLandmark.LEFT_ELBOW.value,       # 13
        mp_pose.PoseLandmark.RIGHT_ELBOW.value,      # 14
        mp_pose.PoseLandmark.LEFT_WRIST.value,       # 15 - CRITICAL for arm position
        mp_pose.PoseLandmark.RIGHT_WRIST.value,      # 16 - CRITICAL for arm position
        mp_pose.PoseLandmark.LEFT_PINKY.value,       # 17
        mp_pose.PoseLandmark.RIGHT_PINKY.value,      # 18
        mp_pose.PoseLandmark.LEFT_INDEX.value,       # 19
        mp_pose.PoseLandmark.RIGHT_INDEX.value,      # 20
        mp_pose.PoseLandmark.LEFT_THUMB.value,       # 21
        mp_pose.PoseLandmark.RIGHT_THUMB.value,      # 22
    ]
    
    # Apply higher weights to arm/wrist joints
    ARM_WEIGHT_MULTIPLIER = 3.0
    WRIST_WEIGHT_MULTIPLIER = 4.0
    
    for idx in arm_wrist_indices:
        if idx in [15, 16]:  # LEFT_WRIST, RIGHT_WRIST - most critical
            joint_weights[idx] = WRIST_WEIGHT_MULTIPLIER
        else:  # Other arm joints
            joint_weights[idx] = ARM_WEIGHT_MULTIPLIER
    
    # Calculate weighted cosine similarity across all joints
    cos_similarities = []
    weights = []
    
    for i in range(33):
        joint1 = p1[i]  # 3D vector for joint i
        joint2 = p2[i]  # 3D vector for joint i
        
        # Cosine similarity between corresponding joints
        dot = np.dot(joint1, joint2)
        norm1 = np.linalg.norm(joint1)
        norm2 = np.linalg.norm(joint2)
        
        if norm1 > 0 and norm2 > 0:
            joint_similarity = dot / (norm1 * norm2)
            cos_similarities.append(joint_similarity)
            weights.append(joint_weights[i])
        else:
            cos_similarities.append(0.0)
            weights.append(joint_weights[i])
   
    # Calculate WEIGHTED average cosine similarity
    cos_similarities = np.array(cos_similarities)
    weights = np.array(weights)
    
    cosine_similarity_score = np.sum(cos_similarities * weights) / np.sum(weights)
    
    # ==============================================
    # PART 2: DISTANCE CONSTRAINT (Position-based)
    # ==============================================
    # Focus on key joints for distance measurement
    key_joints_for_distance = [
        mp_pose.PoseLandmark.LEFT_WRIST.value,       # 15
        mp_pose.PoseLandmark.RIGHT_WRIST.value,      # 16
        mp_pose.PoseLandmark.LEFT_ELBOW.value,       # 13
        mp_pose.PoseLandmark.RIGHT_ELBOW.value,      # 14
        mp_pose.PoseLandmark.LEFT_KNEE.value,        # 25
        mp_pose.PoseLandmark.RIGHT_KNEE.value,       # 26
    ]
    
    distances = []
    for joint_idx in key_joints_for_distance:
        # Calculate euclidean distance (using only x,y coordinates)
        joint1_2d = p1[joint_idx][:2]
        joint2_2d = p2[joint_idx][:2]
        
        distance = np.sqrt(np.sum((joint1_2d - joint2_2d)**2))
        distances.append(distance)
    
    avg_distance = np.mean(distances)
    max_distance = np.max(distances)
    
    # Convert distance to similarity score (inverse relationship)
    # Good poses should have small distances (high distance similarity)
    # Bad poses should have large distances (low distance similarity)
    distance_similarity = max(0, 1.0 - (avg_distance / distance_threshold))
    
    # CRITICAL: Apply distance penalty for poses that are too far apart
    # If average distance > threshold, severely penalize the overall score
    distance_penalty = 1.0
    if avg_distance > distance_threshold:
        distance_penalty = max(0.1, 1.0 - ((avg_distance - distance_threshold) / distance_threshold))
    
    # ==============================================
    # PART 3: HYBRID COMBINATION
    # ==============================================
    # Combine cosine similarity and distance similarity
    hybrid_score = (cos_weight * cosine_similarity_score + 
                   dist_weight * distance_similarity) * distance_penalty
    
    # Debug info (optional - can be removed for production)
    if avg_distance > distance_threshold * 1.5:  # Only log problematic cases
        print(f"   🔍 HYBRID DEBUG: COS={cosine_similarity_score:.3f}, DIST_SIM={distance_similarity:.3f}, "
              f"AVG_DIST={avg_distance:.2f}, PENALTY={distance_penalty:.3f}, FINAL={hybrid_score:.3f}")
    
    return hybrid_score

# ==============================================
#  Setup video input and output
print(" Setting up video processing...")

if not os.path.exists(INPUT_VIDEO):
    print(f" Error: Video file {INPUT_VIDEO} not found!")
    exit()

cap = cv2.VideoCapture(INPUT_VIDEO)
if not cap.isOpened():
    print(f" Error: Could not open video {INPUT_VIDEO}")
    exit()

fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
delay_frames = int(fps * START_DELAY_SECONDS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Setup video output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(OUTPUT_VIDEO, fourcc, fps, (frame_width, frame_height))

print(f" Video: {INPUT_VIDEO} | FPS: {fps} | Total frames: {total_frames}")
print(f" Output video: {OUTPUT_VIDEO}")
print(f" Output CSV: {OUTPUT_CSV}")

# ==============================================
#  Pre-process reference poses
print(" Pre-processing reference poses...")
processed_reference_poses = []

for i, step_data in enumerate(reference_data):
    ref_pose = extract_reference_pose(step_data)
    if ref_pose is not None:
        processed_reference_poses.append(ref_pose)
        step_name = step_data.get('step_name', kata_steps[i] if i < len(kata_steps) else f'Step {i+1}')
        print(f"    Pose detected and normalized")
        print(f"    Step '{step_name}' processed successfully")
        print(get_key_points_preview(ref_pose))
        print()  # Add spacing between steps
    else:
        print(f"    Step {i+1}: Failed to process")
        processed_reference_poses.append(None)

print(f" Successfully processed {sum(1 for p in processed_reference_poses if p is not None)} reference poses")

# ==============================================
# Detection loop with FIXED TIME PERCEPTION AND HOLD TIMER
current_step = 0
frame_idx = 0
step_results = []
consecutive_good_frames = 0
required_good_frames = int(fps * MIN_HOLD_TIME)
best_similarity_this_step = 0.0

# FIXED: Use frame-based timing instead of mixing real-time and video time
step_start_frame = None
pose_match_start_frame = None
step_completed = False  # NEW: Flag to track if step is completed

print(f" Starting HYBRID pose matching with FIXED TIME PERCEPTION")
print(f" Required good frames: {required_good_frames}")
print(f" Similarity threshold: {SIMILARITY_THRESHOLD}")
print(f" Distance threshold: {DISTANCE_THRESHOLD}")
print(f"  Cosine weight: {COSINE_WEIGHT}, Distance weight: {DISTANCE_WEIGHT}")
print(f" Step timeout: {STEP_TIMEOUT_SECONDS}s ({int(fps * STEP_TIMEOUT_SECONDS)} frames)")
print(f" Video FPS: {fps}")
print("-" * 50)

while cap.isOpened() and current_step < len(reference_data):
    ret, frame = cap.read()
    
    if not ret:
        print(" Video ended.")
        break

    # Handle start delay
    if frame_idx < delay_frames:
        remaining_delay = (delay_frames - frame_idx) / fps
        cv2.putText(frame, f"Starting in {remaining_delay:.1f}s...", 
                   (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
        out.write(frame)
        cv2.imshow("Pose Detection", frame)
        if cv2.waitKey(1) == ord('q'):
            break
        frame_idx += 1
        continue

    # Convert frame index to video timestamp
    video_timestamp = frame_idx / fps
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    # Draw pose landmarks
    if results.pose_landmarks:
        drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    
    # Check if kata is completed
    if current_step >= len(reference_data):
        cv2.putText(frame, " Kata Completed!", (20, 40), 
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        out.write(frame)
        cv2.imshow("Pose Detection", frame)
        if cv2.waitKey(1) == ord('q'):
            break
        frame_idx += 1
        continue

    if results.pose_landmarks:
        # Initialize step timing if needed (FIXED: Use frame-based timing)
        if step_start_frame is None:
            step_start_frame = frame_idx
            consecutive_good_frames = 0
            pose_match_start_frame = None
            best_similarity_this_step = 0.0
            step_completed = False  # Reset completion flag for new step

        # Calculate pose similarity
        user_pose = normalize_landmarks(results.pose_landmarks.landmark)
        ref_pose = processed_reference_poses[current_step]
        
        if ref_pose is not None:
            similarity = pose_similarity(user_pose, ref_pose, 
                                       distance_threshold=DISTANCE_THRESHOLD,
                                       cos_weight=COSINE_WEIGHT, 
                                       dist_weight=DISTANCE_WEIGHT)
            best_similarity_this_step = max(best_similarity_this_step, similarity)
            
            # FIXED: Calculate elapsed time using frame-based approach
            elapsed_frames = frame_idx - step_start_frame
            elapsed_time = elapsed_frames / fps
            
            pose_is_good = similarity > SIMILARITY_THRESHOLD

            # Handle pose matching logic (FIXED: Frame-based timing)
            if pose_is_good:
                consecutive_good_frames += 1
                if pose_match_start_frame is None:
                    pose_match_start_frame = frame_idx
                    print(f" MATCH STARTED at frame {frame_idx} ({video_timestamp:.2f}s)! Step: {current_step+1}, Similarity: {similarity:.4f}")
                    # Show detailed pose comparison when match starts
                    print(f"    Pose detected and normalized")
                    print(f"   Step '{kata_steps[current_step]}' match detected")
                    print(compare_poses_detailed(user_pose, ref_pose, kata_steps[current_step]))
            else:
                consecutive_good_frames = 0
                pose_match_start_frame = None

            # FIXED: Calculate hold time using frames
            hold_frames = (frame_idx - pose_match_start_frame) if pose_match_start_frame else 0
            hold_time = hold_frames / fps

            # FIXED HOLD TIMER LOGIC: Must meet both frame count AND minimum hold time
            pose_held_long_enough = hold_time >= MIN_HOLD_TIME
            enough_consecutive_frames = consecutive_good_frames >= required_good_frames
            
            # Check for step completion - FIXED: Requires BOTH conditions AND not already completed
            if pose_held_long_enough and enough_consecutive_frames and not step_completed:
                step_results.append((current_step, kata_steps[current_step], video_timestamp, similarity, "SUCCESS"))
                print(f" Step {current_step+1} COMPLETED ({kata_steps[current_step]}) - Similarity: {similarity:.3f} at {video_timestamp:.2f}s (held for {hold_time:.2f}s)")
                print(f"    Pose detected and normalized")
                print(f"   Step '{kata_steps[current_step]}' processed successfully")
                print(compare_poses_detailed(user_pose, ref_pose, kata_steps[current_step]))
                print("-" * 80)
                step_completed = True  # Mark as completed to prevent re-triggering
                
                # FIXED: Properly reset ALL state for next step
                current_step += 1
                step_start_frame = None
                consecutive_good_frames = 0
                pose_match_start_frame = None
                best_similarity_this_step = 0.0
                
            elif elapsed_time >= STEP_TIMEOUT_SECONDS and not step_completed:
                step_results.append((current_step, kata_steps[current_step], video_timestamp, best_similarity_this_step, "TIMEOUT"))
                print(f"⏰ TIMEOUT! Moving on from step {current_step+1} ({kata_steps[current_step]}) - Best similarity: {best_similarity_this_step:.3f} at {video_timestamp:.2f}s")
                print(f"   ❌ Step '{kata_steps[current_step]}' timed out")
                if user_pose is not None:
                    print(compare_poses_detailed(user_pose, ref_pose, kata_steps[current_step]))
                print("-" * 80)
                step_completed = True  # Mark as completed to prevent re-triggering
                
                # FIXED: Properly reset ALL state for next step
                current_step += 1
                step_start_frame = None
                consecutive_good_frames = 0
                pose_match_start_frame = None
                best_similarity_this_step = 0.0

            # Enhanced visual feedback with CORRECTED timing display
            step_name = kata_steps[current_step] if current_step < len(kata_steps) else "Completed"
            
            cv2.putText(frame, f"Step {current_step+1}: {step_name}", (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            cv2.putText(frame, f"Similarity: {similarity:.3f} (Best: {best_similarity_this_step:.3f})", (20, 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
            
            # FIXED: Better hold time display with completion status
            hold_status = " READY!" if (pose_held_long_enough and enough_consecutive_frames) else f"{hold_time:.1f}s / {MIN_HOLD_TIME:.1f}s"
            color = (0, 255, 0) if (pose_held_long_enough and enough_consecutive_frames) else (255, 0, 255)
            cv2.putText(frame, f"Hold: {hold_status}", (20, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            
            cv2.putText(frame, f"Frames: {consecutive_good_frames}/{required_good_frames}", (20, 130),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
            cv2.putText(frame, f"Time: {elapsed_time:.1f}s / {STEP_TIMEOUT_SECONDS:.1f}s", (20, 160),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            cv2.putText(frame, f"Video Time: {video_timestamp:.2f}s", (20, 190),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 2)

            # FIXED: Better success/failure indicator with hold status
            if step_completed:
                cv2.putText(frame, f"🎯 Step Completed!", (20, 220),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            elif pose_is_good and pose_held_long_enough and enough_consecutive_frames:
                cv2.putText(frame, f"✅ Hold Complete - Moving to next!", (20, 220),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            elif pose_is_good:
                cv2.putText(frame, f"🟡 Good Pose - Keep Holding!", (20, 220),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
            else:
                cv2.putText(frame, f"⚠️ Adjust Pose", (20, 220),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        else:
            cv2.putText(frame, f"❌ Reference pose {current_step+1} invalid", (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    else:
        cv2.putText(frame, " No pose detected.", (20, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.putText(frame, f"Video Time: {video_timestamp:.2f}s", (20, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 2)

    # Write frame to output video and display
    out.write(frame)
    cv2.imshow("Pose Detection", frame)
    if cv2.waitKey(1) == ord('q'):
        break
    
    frame_idx += 1

# ==============================================
#  Cleanup and save results
cap.release()
out.release()
cv2.destroyAllWindows()
pose.close()

print("\n" + "="*50)
print(" Video processing completed with FIXED TIME PERCEPTION and KEY POINTS PREVIEW.")
print(f" {sum(1 for r in step_results if r[4] == 'SUCCESS')} steps succeeded / {len(reference_data)} total.")

# Save detailed CSV
with open(OUTPUT_CSV, "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Step Index", "Step Name", "Timestamp (s)", "Best Similarity", "Status"])
    writer.writerows(step_results)

print(f" CSV saved: {OUTPUT_CSV}")
print(f" Video saved: {OUTPUT_VIDEO}")

# ==============================================
# ✅Summary Statistics
if step_results:
    successful_steps = [r for r in step_results if r[4] == "SUCCESS"]
    timeout_steps = [r for r in step_results if r[4] == "TIMEOUT"]
    
    print(f"\n SUMMARY:")
    print(f"   Successful steps: {len(successful_steps)}")
    print(f"   Timeout steps: {len(timeout_steps)}")
    print(f"   Success rate: {len(successful_steps)/len(reference_data)*100:.1f}%")
    
    if successful_steps:
        avg_similarity = np.mean([r[3] for r in successful_steps])
        print(f"   Average similarity (successful): {avg_similarity:.3f}")
        min_similarity = np.min([r[3] for r in successful_steps])
        max_similarity = np.max([r[3] for r in successful_steps])
        print(f"   Similarity range (successful): {min_similarity:.3f} - {max_similarity:.3f}")
    
    if timeout_steps:
        avg_similarity_timeout = np.mean([r[3] for r in timeout_steps])
        print(f"   Average similarity (timeout): {avg_similarity_timeout:.3f}")


print("   • Combines cosine similarity (angles) + distance constraints (positions)")
print("   • Prevents false positives from similar angles but different positions")
print("   • Configurable distance threshold and weight parameters")
print("   • Distance penalty system for poses that are too far apart")
print("   • Enhanced pose comparison showing user vs reference coordinates")
print("   • Side-by-side analysis with difference calculations")
print("   • Color-coded match quality indicators (✅⚠️❌)")
print("   • Euclidean distance measurements for each joint")
print("   • Average difference scoring with match quality assessment")
print(" Robust pose matching system with hybrid similarity implemented!")

 Loading reference poses...
 Loaded 18 steps from nested JSON structure
 Setting up video processing...
 Video: vid.mp4 | FPS: 38.12452687358062 | Total frames: 1343
 Output video: output_detected_simple.mp4
 Output CSV: match_stats_simple.csv
 Pre-processing reference poses...
    Pose detected and normalized
    Step 'Ichi' processed successfully
    Key points preview:
      Left wrist:  (0.38, 0.81)
      Right wrist: (1.08, -0.45)
      Left elbow:  (1.04, 0.54)
      Right elbow: (1.73, -0.66)
      Left knee:   (-0.83, 0.58)
      Right knee:  (-1.35, -0.50)

    Pose detected and normalized
    Step 'Ni' processed successfully
    Key points preview:
      Left wrist:  (7.00, -0.17)
      Right wrist: (11.88, 7.93)
      Left elbow:  (7.61, -2.10)
      Right elbow: (11.32, 5.69)
      Left knee:   (-7.62, -1.82)
      Right knee:  (-6.48, 4.64)

    Pose detected and normalized
    Step 'San' processed successfully
    Key points preview:
      Left wrist:  (5.66, 0.93)
      

I0000 00:00:1754516133.698871 1462182 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1754516133.857657 1462350 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1754516133.879698 1462350 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1754516135.496598 1462350 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


 MATCH STARTED at frame 38 (1.00s)! Step: 1, Similarity: 0.9693
    Pose detected and normalized
   Step 'Ichi: Left Lower Block' match detected
    POSE COMPARISON for 'Ichi: Left Lower Block':
   Joint        User Pose          Reference          Difference  
   ------------------------------------------------------------
   Left wrist   (0.38, 0.72)       (0.38, 0.81)       0.081    ✅
   Right wrist  (1.00, -0.44)      (1.08, -0.45)      0.079    ✅
   Left elbow   (1.06, 0.47)       (1.04, 0.54)       0.074    ✅
   Right elbow  (1.60, -0.68)      (1.73, -0.66)      0.134    ✅
   Left knee    (-0.96, 0.56)      (-0.83, 0.58)      0.135    ✅
   Right knee   (-1.37, -0.45)     (-1.35, -0.50)     0.057    ✅
   ------------------------------------------------------------
   Average difference: 0.093 ✅ EXCELLENT MATCH!
 Step 1 COMPLETED (Ichi: Left Lower Block) - Similarity: 0.775 at 1.02s (held for 0.03s)
    Pose detected and normalized
   Step 'Ichi: Left Lower Block' processed success