In [6]:
import cv2
import pandas as pd
import numpy as np
from pathlib import Path
from IPython.display import display

# --- CONFIGURATION ---
VIDEO_PATH = "../data/NSL_Vowel/S3_NSL_Vowel_Prepared/S3_NSL_Vowel_Prepared_Camera_all.MOV" 
MODE = "vowel"
IS_CROPPED = True  # Set this based on the video you are opening

VOWELS = ["A", "AA", "I", "II", "U", "UU", "RI", "E", "AI", "O", "AU", "AM", "AH"]
CONSONANTS = ["KA", "KHA", "GA", "GHA", "NGA", "CHA", "CHHA", "JA", "JHA", "YNA", 
              "TA", "THA", "DA", "DHA", "ANA", "TABAL", "THABAL", "DABAL", "DHABAL", 
              "NA", "PA", "PHA", "BA", "BHA", "MA", "YA", "RA", "LA", "WA", 
              "SHA", "SHHA", "SA", "HA", "KSHA", "TRA", "GYA"]

def run_professional_annotator(video_path, mode, is_cropped):
    sequence = VOWELS if mode == "vowel" else CONSONANTS
    cap = cv2.VideoCapture(video_path)
    
    current_idx = 0
    annotations = []
    
    # State management
    temp_start = None
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"--- Annotating {Path(video_path).name} ---")
    print("CONTROLS:")
    print(" [S] -> Mark START of letter | [E] -> Mark END of letter")
    print(" [D/F] -> Forward 1/10       | [A/R] -> Backward 1/10")
    print(" [W] -> Clear current Start  | [Q] -> Quit/Save")
    print("-" * 50)

    while True:
        frame_idx = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)
            continue

        # --- UI Overlay ---
        display = frame.copy()
        current_letter = sequence[current_idx] if current_idx < len(sequence) else "FINISHED"
        
        # Status Box
        cv2.rectangle(display, (0, 0), (500, 150), (0, 0, 0), -1)
        color = (0, 255, 255) if temp_start is None else (0, 255, 0)
        status = "WAITING FOR START (S)" if temp_start is None else f"START MARKED AT {temp_start}: WAITING FOR END (E)"
        
        cv2.putText(display, f"Target: {current_letter}", (20, 40), 1, 2, (255, 255, 255), 2)
        cv2.putText(display, status, (20, 80), 1, 1.2, color, 2)
        cv2.putText(display, f"Frame: {frame_idx} / {total_frames}", (20, 120), 1, 1, (200, 200, 200), 1)

        cv2.imshow("Multi-Letter Annotator", display)
        
        # --- Key Logic ---
        key = cv2.waitKey(0) & 0xFF

        if key == ord('s'): # MARK START
            temp_start = frame_idx
            print(f"üìç Start of {current_letter} set to {temp_start}")

        elif key == ord('e'): # MARK END
            if temp_start is not None:
                if frame_idx > temp_start:
                    annotations.append({
                        'label': current_letter,
                        'start_frame': temp_start,
                        'end_frame': frame_idx,
                        'is_cropped': is_cropped
                    })
                    print(f"‚úÖ Saved {current_letter}: {temp_start} -> {frame_idx}")
                    temp_start = None
                    current_idx += 1
                else:
                    print("‚ùå Error: End frame must be after Start frame!")
            else:
                print("‚ùå Error: Mark Start (S) first!")

        elif key == ord('w'): # RESET TEMP
            temp_start = None
            print("üóëÔ∏è Current start cleared.")

        elif key == ord('d'): cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx + 1)
        elif key == ord('f'): cap.set(cv2.CAP_PROP_POS_FRAMES, min(total_frames-1, frame_idx + 10))
        elif key == ord('a'): cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, frame_idx - 1))
        elif key == ord('r'): cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, frame_idx - 10))
        elif key == ord('q'): break

    cap.release()
    cv2.destroyAllWindows()
    
    if annotations:
        df = pd.DataFrame(annotations)
        df['video_name'] = Path(video_path).name
        df['folder_name'] = Path(video_path).parent.name
        return df

# Run
multi_df = run_professional_annotator(VIDEO_PATH, MODE, IS_CROPPED)
if multi_df is not None:
    # Save to a temporary CSV to check quality
    multi_df.to_csv("test_annotations.csv", index=False)
    display(multi_df)

--- Annotating S3_NSL_Vowel_Prepared_Camera_all.MOV ---
CONTROLS:
 [S] -> Mark START of letter | [E] -> Mark END of letter
 [D/F] -> Forward 1/10       | [A/R] -> Backward 1/10
 [W] -> Clear current Start  | [Q] -> Quit/Save
--------------------------------------------------
‚ùå Error: Mark Start (S) first!
üìç Start of A set to 6
‚úÖ Saved A: 6 -> 60
üìç Start of AA set to 82
‚úÖ Saved AA: 82 -> 134
üìç Start of I set to 182
‚úÖ Saved I: 182 -> 210
üìç Start of II set to 267
‚úÖ Saved II: 267 -> 300
üìç Start of U set to 360
‚úÖ Saved U: 360 -> 392
üìç Start of UU set to 427
‚úÖ Saved UU: 427 -> 466
üìç Start of RI set to 491
‚úÖ Saved RI: 491 -> 540
üìç Start of E set to 571
‚úÖ Saved E: 571 -> 606
üìç Start of AI set to 645
‚úÖ Saved AI: 645 -> 678
üìç Start of O set to 715
‚úÖ Saved O: 715 -> 748
üìç Start of AU set to 800
‚úÖ Saved AU: 800 -> 813
üìç Start of AM set to 856
‚úÖ Saved AM: 856 -> 897
üìç Start of AH set to 962
‚úÖ Saved AH: 962 -> 990


Unnamed: 0,label,start_frame,end_frame,is_cropped,video_name,folder_name
0,A,6,60,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
1,AA,82,134,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
2,I,182,210,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
3,II,267,300,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
4,U,360,392,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
5,UU,427,466,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
6,RI,491,540,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
7,E,571,606,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
8,AI,645,678,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
9,O,715,748,True,S3_NSL_Vowel_Prepared_Camera_all.MOV,S3_NSL_Vowel_Prepared
