In [None]:
import numpy as np
import cv2
from pathlib import Path

# FILE_PATH = "../experiments/generated_output.npz"

FILE_PATH = "../training_dataset/sequences/vowel/S1_NSL_Vowel_Unprepared_Bright/A.npz"

OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 800, 800
FPS = 60

# MediaPipe Connection Maps
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),      # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),      # Index
    (5, 9), (9, 10), (10, 11), (11, 12), # Middle
    (9, 13), (13, 14), (14, 15), (15, 16), # Ring
    (13, 17), (0, 17), (17, 18), (18, 19), (19, 20) # Pinky
]

POSE_CONNECTIONS = [
    (11, 12), (11, 13), (13, 15), # Left arm
    (12, 14), (14, 16),           # Right arm
    (11, 23), (12, 24), (23, 24)  # Torso
]

def draw_skeleton(data_path, output_path):
    data = np.load(data_path)
    pose = data['pose']  
    lh = data['lh']      
    rh = data['rh']      
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    print(f"Generating video for {len(pose)} frames...")

    for i in range(len(pose)):
        frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

        # 1. DRAW HANDS
        lw_pose = pose[i][15] 
        rw_pose = pose[i][16]

        # In generated data, we don't have visibility, so we check if coordinates are non-zero
        if not np.all(lw_pose[:2] == 0):
            l_center = (int(lw_pose[0] * WIDTH), int(lw_pose[1] * HEIGHT))
        else:
            l_center = (200, 400)

        if not np.all(rw_pose[:2] == 0):
            r_center = (int(rw_pose[0] * WIDTH), int(rw_pose[1] * HEIGHT))
        else:
            r_center = (600, 400)

        centers = {'left': l_center, 'right': r_center}
        hand_visual_scale = 200

        for side, hand_pts, color in [('left', lh[i], (255, 0, 0)), ('right', rh[i], (0, 0, 255))]:
            if np.all(hand_pts == 0): continue
            current_center = centers[side]
            for start, end in HAND_CONNECTIONS:
                p1 = (int(hand_pts[start][0] * hand_visual_scale + current_center[0]), 
                    int(hand_pts[start][1] * hand_visual_scale + current_center[1]))
                p2 = (int(hand_pts[end][0] * hand_visual_scale + current_center[0]), 
                    int(hand_pts[end][1] * hand_visual_scale + current_center[1]))
                cv2.line(frame, p1, p2, color, 2)

        # 2. DRAW POSE (Handle 3 or 4 columns)
        for start, end in POSE_CONNECTIONS:
            p1_raw = pose[i][start]
            p2_raw = pose[i][end]
            
            # --- FIX LOGIC HERE ---
            # If the pose has a 4th column, use visibility. 
            # If it only has 3 columns (Generated), assume visibility = 1.0
            p1_vis = p1_raw[3] if len(p1_raw) == 4 else 1.0
            p2_vis = p2_raw[3] if len(p2_raw) == 4 else 1.0
            
            if p1_vis > 0.5 and p2_vis > 0.5:
                p1 = (int(p1_raw[0] * WIDTH), int(p1_raw[1] * HEIGHT))
                p2 = (int(p2_raw[0] * WIDTH), int(p2_raw[1] * HEIGHT))
                cv2.line(frame, p1, p2, (0, 255, 0), 2)

        cv2.putText(frame, f"Frame: {i}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        out.write(frame)

    out.release()
    print(f"âœ… Skeleton video saved to: {output_path}")

# Run the visualization
draw_skeleton(FILE_PATH, OUTPUT_VIDEO)

In [None]:
import numpy as np
import json
from pathlib import Path

def convert_enhanced_npz_to_json(npz_path, output_json_path, video_path_label=None):
    """
    Converts enhanced NPZ files back to JSON.
    Reconstructs original coordinates using: (normalized * scale) + wrist
    """
    # 1. Load the NPZ data
    data = np.load(npz_path)
    
    pose_array = data['pose']      # (Frames, 33, 4)
    lh_array = data['lh']          # (Frames, 21, 3)
    rh_array = data['rh']          # (Frames, 21, 3)
    lh_meta = data['lh_meta']      # (Frames, 4) -> [wx, wy, wz, scale]
    rh_meta = data['rh_meta']      # (Frames, 4) -> [wx, wy, wz, scale]
    
    # Extract video info [fps, width, height] saved in NPZ
    # If video_info isn't there, we fallback to defaults
    if 'video_info' in data:
        fps_orig, width, height = data['video_info']
    else:
        fps_orig, width, height = 60.0, 1920, 1080

    # The user specifically requested 'fps': 60 in the JSON
    fps_to_use = 60 

    # 2. Construct JSON Structure
    output_data = {
        'metadata': {
            'video_path': str(video_path_label) if video_path_label else "unknown",
            'fps': float(fps_to_use),
            'frame_width': int(width),
            'frame_height': int(height),
            'total_frames': int(pose_array.shape[0]),
            'frame_skip': 1,
            'hands_swapped': True
        },
        'frames': []
    }

    # 3. Process Frames
    for i in range(pose_array.shape[0]):
        frame_data = {
            'frame_number': i + 1,
            'timestamp': i / fps_to_use,
            'pose': None,
            'hands': {'left': None, 'right': None},
            'face': None
        }

        # --- Reconstruct Pose ---
        if not np.all(pose_array[i] == 0):
            frame_data['pose'] = [
                {'x': float(lm[0]), 'y': float(lm[1]), 'z': float(lm[2]), 'visibility': float(lm[3])}
                for lm in pose_array[i]
            ]

        # --- Reconstruct Left Hand ---
        # Formula: (normalized_coords * scale) + wrist_position
        if not np.all(lh_array[i] == 0):
            wx, wy, wz, scale = lh_meta[i]
            frame_data['hands']['left'] = [
                {
                    'x': float((lm[0] * scale) + wx),
                    'y': float((lm[1] * scale) + wy),
                    'z': float((lm[2] * scale) + wz)
                }
                for lm in lh_array[i]
            ]

        # --- Reconstruct Right Hand ---
        if not np.all(rh_array[i] == 0):
            wx, wy, wz, scale = rh_meta[i]
            frame_data['hands']['right'] = [
                {
                    'x': float((lm[0] * scale) + wx),
                    'y': float((lm[1] * scale) + wy),
                    'z': float((lm[2] * scale) + wz)
                }
                for lm in rh_array[i]
            ]

        output_data['frames'].append(frame_data)

    # 4. Save JSON
    with open(output_json_path, 'w') as f:
        json.dump(output_data, f, indent=2)
    
    print(f"Successfully converted {npz_path.name} to {output_json_path}")

# --- Example Usage ---
npz_file = Path("../experiments/generated_output.npz")
convert_enhanced_npz_to_json(npz_file, "keypoints.json", video_path_label="S1/A.MOV")

In [None]:
import numpy as np
import cv2
from pathlib import Path

data = np.load("../experiments/generated_output.npz")
print(data['lh'][0])   # First frame left hand
print(data['lh'][-1])  # Last frame left hand

In [None]:
import numpy as np
import cv2
from pathlib import Path

FILE_PATH = "../training_dataset/sequences/NSL_Consonant_Multi/S14_NSL_Consonant_RealWorld/S14_NSL_Consonant/BA_883_913.npz"

# FILE_PATH = "../training_dataset/sequences/consonant/S1_NSL_Consonant_Bright/D_SHA.npz"

OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 800, 800
FPS = 60

# MediaPipe Connection Maps
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),      # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),      # Index
    (5, 9), (9, 10), (10, 11), (11, 12), # Middle
    (9, 13), (13, 14), (14, 15), (15, 16), # Ring
    (13, 17), (0, 17), (17, 18), (18, 19), (19, 20) # Pinky
]

POSE_CONNECTIONS = [
    (11, 12), (11, 13), (13, 15), # Left arm
    (12, 14), (14, 16),           # Right arm
    (11, 23), (12, 24), (23, 24)  # Torso
]

def draw_skeleton(data_path, output_path):
    data = np.load(data_path)
    pose = data['pose']  
    lh = data['lh']      
    rh = data['rh']      
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    print(f"Generating centered video for {len(pose)} frames...")

    for i in range(len(pose)):
        frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

        # --- STEP 1: CALCULATE CENTERING OFFSET ---
        # We use the shoulders (Landmarks 11 and 12) to find the center of the chest
        ls_raw = pose[i][11] # Left Shoulder
        rs_raw = pose[i][12] # Right Shoulder
        
        # Calculate where the chest center is currently (in 0-1 coordinates)
        curr_center_x = (ls_raw[0] + rs_raw[0]) / 2
        curr_center_y = (ls_raw[1] + rs_raw[1]) / 2
        
        # Calculate the pixel offset needed to move curr_center to (WIDTH/2, HEIGHT/2)
        offset_x = (WIDTH / 2) - (curr_center_x * WIDTH)
        offset_y = (HEIGHT / 2) - (curr_center_y * HEIGHT)

        # Helper function to apply centering to any point
        def get_centered_pt(pt_raw, scale_w, scale_h):
            px = int(pt_raw[0] * scale_w + offset_x)
            py = int(pt_raw[1] * scale_h + offset_y)
            return (px, py)

        # --- STEP 2: DRAW POSE (Centered) ---
        for start, end in POSE_CONNECTIONS:
            p1_raw = pose[i][start]
            p2_raw = pose[i][end]
            p1_vis = p1_raw[3] if len(p1_raw) == 4 else 1.0
            p2_vis = p2_raw[3] if len(p2_raw) == 4 else 1.0
            
            if p1_vis > 0.5 and p2_vis > 0.5:
                p1 = get_centered_pt(p1_raw, WIDTH, HEIGHT)
                p2 = get_centered_pt(p2_raw, WIDTH, HEIGHT)
                cv2.line(frame, p1, p2, (0, 255, 0), 2)

        # --- STEP 3: DRAW HANDS (Centered via Wrist) ---
        lw_pose = pose[i][15] # Left Wrist Pose Landmark
        rw_pose = pose[i][16] # Right Wrist Pose Landmark

        # Calculate centered wrist positions to act as anchors for hands
        l_anchor = get_centered_pt(lw_pose, WIDTH, HEIGHT)
        r_anchor = get_centered_pt(rw_pose, WIDTH, HEIGHT)
        
        # If pose is missing, use fixed centers
        if np.all(lw_pose[:2] == 0): l_anchor = (WIDTH//4, HEIGHT//2)
        if np.all(rw_pose[:2] == 0): r_anchor = (3*WIDTH//4, HEIGHT//2)

        anchors = {'left': l_anchor, 'right': r_anchor}
        hand_visual_scale = 400 # Visual size of hand

        for side, hand_pts, color in [('left', lh[i], (255, 0, 0)), ('right', rh[i], (0, 0, 255))]:
            if np.all(hand_pts == 0): continue
            
            # Divide by 5.0 to reverse the training-time scaling
            current_hand = hand_pts / 5.0
            current_anchor = anchors[side]
            
            for start, end in HAND_CONNECTIONS:
                # Calculate finger positions relative to the centered wrist anchor
                p1 = (int(current_hand[start][0] * hand_visual_scale + current_anchor[0]), 
                      int(current_hand[start][1] * hand_visual_scale + current_anchor[1]))
                p2 = (int(current_hand[end][0] * hand_visual_scale + current_anchor[0]), 
                      int(current_hand[end][1] * hand_visual_scale + current_anchor[1]))
                cv2.line(frame, p1, p2, color, 2)

            # Draw points
            for pt in current_hand:
                px = int(pt[0] * hand_visual_scale + current_anchor[0])
                py = int(pt[1] * hand_visual_scale + current_anchor[1])
                cv2.circle(frame, (px, py), 3, (255, 255, 255), -1)

        cv2.putText(frame, f"Frame: {i}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        out.write(frame)

    out.release()
    print(f"âœ… Centered skeleton video saved to: {output_path}")
    
# Run the visualization
draw_skeleton(FILE_PATH, OUTPUT_VIDEO)

In [None]:
import numpy as np
import cv2
from pathlib import Path

# --- CONFIGURATION ---
FILE_PATH = "../experiments/generated_output.npz"
OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 1000, 1000 # Larger canvas
FPS = 60

# MediaPipe Connection Maps
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),      # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),      # Index
    (5, 9), (9, 10), (10, 11), (11, 12), # Middle
    (9, 13), (13, 14), (14, 15), (15, 16), # Ring
    (13, 17), (0, 17), (17, 18), (18, 19), (19, 20) # Pinky
]

POSE_CONNECTIONS = [
    (11, 12), (11, 13), (13, 15), # Left arm
    (12, 14), (14, 16),           # Right arm
    (11, 23), (12, 24), (23, 24)  # Torso
]

def draw_generated_skeleton(data_path, output_path):
    # Load the npz file
    data = np.load(data_path)
    pose = data['pose']  # (Frames, 33, 3)
    lh = data['lh']      # (Frames, 21, 3)
    rh = data['rh']      # (Frames, 21, 3)
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    # --- VISUAL SCALING PARAMETERS ---
    # Since data is normalized by shoulder width (approx 0.3), 
    # we need a large scale to see it on screen.
    VISUAL_SCALE = 500 
    OFFSET_X, OFFSET_Y = WIDTH // 2, HEIGHT // 3 # Move skeleton to center-top

    print(f"Generating video for {len(pose)} frames...")

    for i in range(len(pose)):
        # Create black canvas
        frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

        # Helper to convert normalized coordinate to screen pixel
        def to_pixel(pt):
            px = int(pt[0] * VISUAL_SCALE + OFFSET_X)
            py = int(pt[1] * VISUAL_SCALE + OFFSET_Y)
            return (px, py)

        # 1. DRAW POSE
        for start, end in POSE_CONNECTIONS:
            p1_raw = pose[i][start]
            p2_raw = pose[i][end]
            
            # Draw if not all zeros (masked pose check)
            if not np.all(p1_raw == 0):
                cv2.line(frame, to_pixel(p1_raw), to_pixel(p2_raw), (0, 255, 0), 3)

        # 2. DRAW HANDS
        # Anchors: LH Wrist is Pose 15, RH Wrist is Pose 16
        lw_pose = pose[i][15]
        rw_pos = pose[i][16]
        
        l_anchor = to_pixel(lw_pose) if not np.all(lw_pose == 0) else (WIDTH//3, HEIGHT//2)
        r_anchor = to_pixel(rw_pos) if not np.all(rw_pos == 0) else (2*WIDTH//3, HEIGHT//2)
        anchors = {'left': l_anchor, 'right': r_anchor}

        # The "Proportion Factor" makes the hand 25% the size of the shoulder width
        # This makes it look like a real human hand relative to the body
        PROPORTION_FACTOR = 0.25 
        
        for side, hand_pts, color in [('left', lh[i], (255, 0, 0)), ('right', rh[i], (0, 0, 255))]:
            if np.all(hand_pts == 0): continue
            
            # --- COMBINED SCALING ---
            # 1. Divide by 5.0 to reverse training scaling
            # 2. Multiply by PROPORTION_FACTOR to fit the body
            current_hand = (hand_pts) / 5.0 * PROPORTION_FACTOR
            
            current_anchor = anchors[side]
            
            # Use a larger VISUAL_SCALE to see the person (e.g. 500-800)
            # You can change VISUAL_SCALE at the top of draw_skeleton
            
            for start, end in HAND_CONNECTIONS:
                p1 = (int(current_hand[start][0] * VISUAL_SCALE + current_anchor[0]), 
                      int(current_hand[start][1] * VISUAL_SCALE + current_anchor[1]))
                p2 = (int(current_hand[end][0] * VISUAL_SCALE + current_anchor[0]), 
                      int(current_hand[end][1] * VISUAL_SCALE + current_anchor[1]))
                cv2.line(frame, p1, p2, color, 2)

            for pt in current_hand:
                px = int(pt[0] * VISUAL_SCALE + current_anchor[0])
                py = int(pt[1] * VISUAL_SCALE + current_anchor[1])
                cv2.circle(frame, (px, py), 2, (255, 255, 255), -1)

        # Add Frame Info and Text
        cv2.putText(frame, f"Frame: {i}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1)
        out.write(frame)

    out.release()
    print(f"âœ… Video saved: {output_path}")

# Run
draw_generated_skeleton(FILE_PATH, OUTPUT_VIDEO)

In [8]:
import numpy as np
import cv2
from pathlib import Path

# --- SETTINGS ---
FILE_PATH = "../experiments/generated_output.npz" # Path to your generated file
OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 1000, 1000
FPS = 60

# Adjust these to fit your character perfectly on screen
SKELETON_SCALE = 500  # Size of the body on screen
HAND_VISUAL_SIZE = 3.5 # Relative size of the hands
HAND_DATA_SCALE = 5.0  # MUST MATCH the scale used in your dataset.py

# Colors (BGR)
C_BODY = (0, 255, 0)     # Neon Green
C_LH = (255, 255, 0)     # Cyan
C_RH = (0, 0, 255)       # Red
C_JOINTS = (255, 255, 255) # White

# Connections Mapping
POSE_CONN = [(11, 12), (11, 13), (13, 15), (12, 14), (14, 16), (11, 23), (12, 24), (23, 24)]
HAND_CONN = [(0,1), (1,2), (2,3), (3,4), (0,5), (5,6), (6,7), (7,8), (9,10), (10,11), (11,12), 
             (13,14), (14,15), (15,16), (17,18), (18,19), (19,20), (0,17), (5,9), (9,13), (13,17)]

def visualize_nsl(data_path, output_path):
    # Load data
    data = np.load(data_path)
    pose, lh, rh = data['pose'], data['lh'], data['rh']
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    print(f"ðŸŽ¬ Processing {len(pose)} frames...")

    for i in range(len(pose)):
        # Create professional dark background
        frame = np.ones((HEIGHT, WIDTH, 3), dtype=np.uint8) * 15

        # 1. Transform function: Projects (0,0) centered data to screen center
        def to_screen(pt, offset_x=0, offset_y=0, custom_scale=SKELETON_SCALE):
            x = int(pt[0] * custom_scale + WIDTH/2 + offset_x)
            y = int(pt[1] * custom_scale + HEIGHT/2 + offset_y)
            return (x, y)

        # 2. Draw Pose
        for start, end in POSE_CONN:
            p1 = to_screen(pose[i][start])
            p2 = to_screen(pose[i][end])
            cv2.line(frame, p1, p2, C_BODY, 3, cv2.LINE_AA)

        # 3. Draw Hands (Anchored to Pose Wrists)
        # Landmark 15 = Left Wrist, 16 = Right Wrist
        l_wrist_anchor = to_screen(pose[i][15])
        r_wrist_anchor = to_screen(pose[i][16])
        
        anchors = {'left': l_wrist_anchor, 'right': r_wrist_anchor}
        hands_data = {'left': lh[i], 'right': rh[i]}
        colors = {'left': C_LH, 'right': C_RH}

        for side in ['left', 'right']:
            h_pts = hands_data[side]
            if np.all(h_pts == 0): continue
            
            # UN-SCALE: Reverse the 5x scaling from training
            h_pts = h_pts / HAND_DATA_SCALE 
            
            anchor = anchors[side]
            
            # Draw bones
            for s, e in HAND_CONN:
                # Project fingers relative to the wrist anchor
                p1 = (int(h_pts[s][0] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[0]),
                      int(h_pts[s][1] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[1]))
                p2 = (int(h_pts[e][0] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[0]),
                      int(h_pts[e][1] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[1]))
                cv2.line(frame, p1, p2, colors[side], 2, cv2.LINE_AA)

            # Draw joint dots
            for pt in h_pts:
                px = int(pt[0] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[0])
                py = int(pt[1] * SKELETON_SCALE * HAND_VISUAL_SIZE + anchor[1])
                cv2.circle(frame, (px, py), 3, C_JOINTS, -1, cv2.LINE_AA)

        # 4. Info Overlay
        cv2.putText(frame, f"FRAME: {i}", (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 100, 100), 2)
        cv2.putText(frame, "NSL FINGERSPELLING GENERATOR", (30, HEIGHT-30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (70, 70, 70), 1)

        out.write(frame)

    out.release()
    print(f"âœ… Video created successfully: {output_path}")

# Execute
visualize_nsl(FILE_PATH, OUTPUT_VIDEO)

ðŸŽ¬ Processing 225 frames...
âœ… Video created successfully: skeleton_check.mp4


In [None]:
import numpy as np
import cv2

# --- CONFIGURATION ---
FILE_PATH = "../experiments/generated_output.npz"

# FILE_PATH = "../training_dataset/sequences/consonant/S1_NSL_Consonant_Bright/D_SHA.npz"
OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 1000, 1000
FPS = 60

# MediaPipe Hand Connections
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (0, 5), (5, 6), (6, 7), (7, 8),
    (5, 9), (9, 10), (10, 11), (11, 12),
    (9, 13), (13, 14), (14, 15), (15, 16),
    (13, 17), (0, 17), (17, 18), (18, 19), (19, 20)
]

def draw_generated_hands_only(data_path, output_path):
    data = np.load(data_path)
    lh = data['lh']  # (Frames, 21, 3)
    rh = data['rh']  # (Frames, 21, 3)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    VISUAL_SCALE = 800

    # Fixed anchors so hands don't depend on pose
    LEFT_ANCHOR  = (WIDTH // 3, HEIGHT // 2)
    RIGHT_ANCHOR = (2 * WIDTH // 3, HEIGHT // 2)

    print(f"Generating hand-only video for {len(lh)} frames...")

    for i in range(len(lh)):
        frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

        for hand_pts, anchor, color in [
            (lh[i], LEFT_ANCHOR, (255, 0, 0)),
            (rh[i], RIGHT_ANCHOR, (0, 0, 255))
        ]:
            if np.all(hand_pts == 0):
                continue

            # Reverse training scale and normalize size
            hand = hand_pts / 5.0

            # Draw bones
            for s, e in HAND_CONNECTIONS:
                p1 = (
                    int(hand[s][0] * VISUAL_SCALE + anchor[0]),
                    int(hand[s][1] * VISUAL_SCALE + anchor[1])
                )
                p2 = (
                    int(hand[e][0] * VISUAL_SCALE + anchor[0]),
                    int(hand[e][1] * VISUAL_SCALE + anchor[1])
                )
                cv2.line(frame, p1, p2, color, 2)

            # Draw joints
            for pt in hand:
                px = int(pt[0] * VISUAL_SCALE + anchor[0])
                py = int(pt[1] * VISUAL_SCALE + anchor[1])
                cv2.circle(frame, (px, py), 3, (255, 255, 255), -1)

        cv2.putText(frame, f"Frame: {i}", (30, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1)

        out.write(frame)

    out.release()
    print(f"âœ… Hand-only video saved: {output_path}")

# Run
draw_generated_hands_only(FILE_PATH, OUTPUT_VIDEO)

In [29]:
import numpy as np
import cv2

# --- CONFIGURATION ---
# experiments\eval_samples\epoch_50_sample_A.npz
FILE_PATH = "../experiments/generated_output.npz"

# FILE_PATH = "../training_dataset/sequences/NSL_Consonant_Multi/S3_NSL_Consonant_Prepared/S3_all_consonant_Phone_Camera/JHA_989_1064.npz"
OUTPUT_VIDEO = "skeleton_check.mp4"
WIDTH, HEIGHT = 1000, 1000
FPS = 60

# MediaPipe Hand Connections
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (0, 5), (5, 6), (6, 7), (7, 8),
    (5, 9), (9, 10), (10, 11), (11, 12),
    (9, 13), (13, 14), (14, 15), (15, 16),
    (13, 17), (0, 17), (17, 18), (18, 19), (19, 20)
]

def draw_generated_hands_only(data_path, output_path):
    data = np.load(data_path)
    lh = data['lh']  # (Frames, 21, 3)
    rh = data['rh']  # (Frames, 21, 3)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, FPS, (WIDTH, HEIGHT))

    VISUAL_SCALE = 300

    # Fixed anchors so hands don't depend on pose
    LEFT_ANCHOR  = (WIDTH // 3, HEIGHT // 2)
    RIGHT_ANCHOR = (2 * WIDTH // 3, HEIGHT // 2)

    print(f"Generating hand-only video for {len(lh)} frames...")

    for i in range(len(lh)):
        frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

        for hand_pts, anchor, color in [
            (lh[i], LEFT_ANCHOR, (255, 0, 0)),
            (rh[i], RIGHT_ANCHOR, (0, 0, 255))
        ]:
            if np.all(hand_pts == 0):
                continue

            hand = hand_pts 

            for s, e in HAND_CONNECTIONS:
                p1 = (
                    int(hand[s][0] * VISUAL_SCALE + anchor[0]),
                    int(hand[s][1] * VISUAL_SCALE + anchor[1])
                )
                p2 = (
                    int(hand[e][0] * VISUAL_SCALE + anchor[0]),
                    int(hand[e][1] * VISUAL_SCALE + anchor[1])
                )
                cv2.line(frame, p1, p2, color, 2)

            for pt in hand:
                px = int(pt[0] * VISUAL_SCALE + anchor[0])
                py = int(pt[1] * VISUAL_SCALE + anchor[1])
                cv2.circle(frame, (px, py), 3, (255, 255, 255), -1)

        cv2.putText(frame, f"Frame: {i}", (30, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1)

        out.write(frame)

    out.release()
    print(f"âœ… Hand-only video saved: {output_path}")

# Run
draw_generated_hands_only(FILE_PATH, OUTPUT_VIDEO)

Generating hand-only video for 200 frames...
âœ… Hand-only video saved: skeleton_check.mp4
