In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk

# ─── CONFIGURATION ────────────────────────────────────────────────────────
DATA_DIR           = "D:/Projects/asl_data"   # Root folder where .npy files will be saved
LETTERS            = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
TARGET_PER_LETTER  = 200                      # Default number of samples per letter
WINDOW_TITLE       = "ASL Sample Collector"
CAM_WIDTH          = 640
CAM_HEIGHT         = 480
# ──────────────────────────────────────────────────────────────────────────

class ASLCollectorApp:
    def __init__(self, root):
        self.root = root
        self.root.title(WINDOW_TITLE)
        self.root.protocol("WM_DELETE_WINDOW", self.on_exit)

        # Ensure root data directory exists and create subfolders for A–Z
        os.makedirs(DATA_DIR, exist_ok=True)
        for letter in LETTERS:
            os.makedirs(os.path.join(DATA_DIR, letter), exist_ok=True)

        # State variables
        self.letter_index = 0
        self.target_count = TARGET_PER_LETTER
        self.is_running   = False
        self.current_count = 0

        # Initialize MediaPipe Hands and Drawing utils
        self.mp_hands_module = mp.solutions.hands
        self.mp_drawing      = mp.solutions.drawing_utils
        self.hands_detector  = self.mp_hands_module.Hands(
            static_image_mode=False,
            max_num_hands=2,
            model_complexity=1,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )

        # Build GUI
        self.setup_gui()

        # Initialize camera
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAM_WIDTH)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAM_HEIGHT)

        # Start with first letter
        self.update_letter(self.letter_index)

        # Begin frame update loop
        self.update_frame()

    def setup_gui(self):
        # Top frame: Letter & Count display
        top_frame = ttk.Frame(self.root)
        top_frame.pack(padx=10, pady=5, anchor="w")

        self.letter_label = ttk.Label(top_frame, text="Letter: A", font=("Helvetica", 16))
        self.letter_label.pack(side="left", padx=(0,20))

        self.count_label = ttk.Label(top_frame, text=f"Count: 0/{self.target_count}", font=("Helvetica", 16))
        self.count_label.pack(side="left")

        # Middle frame: Video display label
        self.video_frame = ttk.Label(self.root)
        self.video_frame.pack(padx=10, pady=5)

        # Bottom frame: Control buttons
        btn_frame = ttk.Frame(self.root)
        btn_frame.pack(padx=10, pady=10, fill="x")

        self.start_btn   = ttk.Button(btn_frame, text="Start", command=self.start_capture)
        self.start_btn.pack(side="left", padx=5)

        self.stop_btn    = ttk.Button(btn_frame, text="Stop", command=self.stop_capture, state="disabled")
        self.stop_btn.pack(side="left", padx=5)

        self.capture_btn = ttk.Button(btn_frame, text="Capture", command=self.capture_sample, state="disabled")
        self.capture_btn.pack(side="left", padx=5)

        self.next_btn    = ttk.Button(btn_frame, text="Next Letter", command=self.on_next_letter)
        self.next_btn.pack(side="left", padx=5)

        self.exit_btn    = ttk.Button(btn_frame, text="Exit", command=self.on_exit)
        self.exit_btn.pack(side="right", padx=5)

    def update_letter(self, idx):
        """Switch to the letter at index idx and update counters."""
        self.letter_index = idx % len(LETTERS)
        self.current_letter = LETTERS[self.letter_index]

        # Ensure the letter folder exists
        folder = os.path.join(DATA_DIR, self.current_letter)
        os.makedirs(folder, exist_ok=True)

        # Count existing .npy files
        existing_files = [f for f in os.listdir(folder) if f.endswith(".npy")]
        self.current_count = len(existing_files)

        # Update labels
        self.letter_label.config(text=f"Letter: {self.current_letter}")
        self.count_label.config(text=f"Count: {self.current_count}/{self.target_count}")

    def start_capture(self):
        """Enable real-time hand tracking and UI buttons."""
        self.is_running = True
        self.start_btn.config(state="disabled")
        self.stop_btn.config(state="normal")
        self.capture_btn.config(state="normal")

    def stop_capture(self):
        """Pause hand tracking and disable capture button."""
        self.is_running = False
        self.start_btn.config(state="normal")
        self.stop_btn.config(state="disabled")
        self.capture_btn.config(state="disabled")

    def capture_sample(self):
        """Save one landmark vector for the current letter if a hand is detected."""
        if not self.last_landmarks:
            return  # Nothing to save
        # Take the first detected hand's landmarks
        coords = np.array([[l.x, l.y, l.z] for l in self.last_landmarks[0].landmark]).flatten()
        letter_folder = os.path.join(DATA_DIR, self.current_letter)
        filename = os.path.join(letter_folder, f"{self.current_count}.npy")
        np.save(filename, coords)
        self.current_count += 1
        self.count_label.config(text=f"Count: {self.current_count}/{self.target_count}")

    def on_next_letter(self):
        """Stop capture and move to the next letter."""
        self.stop_capture()
        self.update_letter(self.letter_index + 1)

    def on_exit(self):
        """Clean up camera and close the app."""
        self.cap.release()
        self.hands_detector.close()
        self.root.quit()
        self.root.destroy()

    def update_frame(self):
        """Grab a frame, detect/draw hand landmarks, and display in GUI."""
        if self.is_running:
            ret, frame = self.cap.read()
            if ret:
                # Mirror for natural view
                frame = cv2.flip(frame, 1)
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                result = self.hands_detector.process(rgb_frame)

                # Store landmarks for capture
                self.last_landmarks = result.multi_hand_landmarks

                # Draw detected landmarks on the frame
                if self.last_landmarks:
                    for hand_landmarks in self.last_landmarks:
                        self.mp_drawing.draw_landmarks(
                            frame, hand_landmarks, self.mp_hands_module.HAND_CONNECTIONS,
                            self.mp_drawing.DrawingSpec(color=(0,255,0), thickness=2, circle_radius=2),
                            self.mp_drawing.DrawingSpec(color=(0,0,255), thickness=1, circle_radius=1)
                        )

                # Convert to ImageTk for display
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)
                self.video_frame.imgtk = imgtk
                self.video_frame.config(image=imgtk)

        # Schedule next frame update (~60 FPS)
        self.root.after(15, self.update_frame)

if __name__ == "__main__":
    # Launch the Tkinter application
    root = tk.Tk()
    app = ASLCollectorApp(root)
    root.mainloop()
