In [None]:
''' python 3.13 no mediapipe'''
'''
import cv2
import numpy as np
import time
from collections import deque

# -----------------------------
# Dummy Drone Control Functions
# -----------------------------
def move_left():
    print("üîµ DRONE COMMAND: LEFT")

def move_right():
    print("üî¥ DRONE COMMAND: RIGHT")

def move_up():
    print("üü¢ DRONE COMMAND: UP")

def move_down():
    print("üü° DRONE COMMAND: DOWN")

def hover():
    print("‚ö™ DRONE COMMAND: HOVER")

def stop():
    print("üî¥ DRONE COMMAND: STOP")

# -----------------------------
# Improved Gesture Recognition
# -----------------------------
class GestureRecognizer:
    def __init__(self):
        # Use KNN background subtractor (more robust)
        self.bg_subtractor = cv2.createBackgroundSubtractorKNN(
            history=200,
            dist2Threshold=400.0,
            detectShadows=False
        )

        # Store background frames for better calibration
        self.bg_frames = []
        self.bg_frames_count = 0
        self.bg_calibration_frames = 60  # More frames = better calibration
        self.is_calibrated = False

        # Running average for stability
        self.avg_background = None

        # Stability parameters
        self.gesture_buffer = deque(maxlen=25)  # Longer buffer
        self.confirmation_threshold = 18  # Need 18/25 frames (72%)
        self.last_command_time = 0
        self.command_cooldown = 2.0

        # Adaptive thresholds
        self.min_hand_area = 3000
        self.max_hand_area = 100000

    def calibrate_background(self, frame):
        """Improved background calibration with frame collection"""
        # Preprocess frame for better calibration
        frame_processed = cv2.GaussianBlur(frame, (5, 5), 0)

        # Store frames for averaging
        if self.bg_frames_count < self.bg_calibration_frames:
            self.bg_frames.append(frame_processed.astype(np.float32))
            self.bg_frames_count += 1

            # Feed to background subtractor with high learning rate
            self.bg_subtractor.apply(frame_processed, learningRate=0.8)

            return False

        # After collecting all frames, compute average background
        if self.avg_background is None and len(self.bg_frames) > 0:
            self.avg_background = np.mean(self.bg_frames, axis=0).astype(np.uint8)
            print("‚úÖ Background model created!")

        self.is_calibrated = True
        return True

    def preprocess(self, frame):
        """Enhanced preprocessing with multiple techniques"""
        # Apply Gaussian blur to reduce noise
        frame_blur = cv2.GaussianBlur(frame, (5, 5), 0)

        # Method 1: Background subtraction
        fg_mask = self.bg_subtractor.apply(frame_blur, learningRate=0)

        # Method 2: Frame differencing with average background
        if self.avg_background is not None:
            frame_diff = cv2.absdiff(frame_blur, self.avg_background)
            gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
            _, thresh_diff = cv2.threshold(gray_diff, 30, 255, cv2.THRESH_BINARY)

            # Combine both methods (intersection for better accuracy)
            fg_mask = cv2.bitwise_and(fg_mask, thresh_diff)

        # Heavy morphological operations for cleaner mask
        kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))

        # Remove small noise
        fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_OPEN, kernel_open, iterations=2)

        # Fill holes
        fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_CLOSE, kernel_close, iterations=2)

        # Additional noise reduction
        fg_mask = cv2.medianBlur(fg_mask, 5)

        # Final threshold
        _, fg_mask = cv2.threshold(fg_mask, 127, 255, cv2.THRESH_BINARY)

        return fg_mask

    def find_hand_contour(self, mask):
        """Find hand contour with better validation"""
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if not contours:
            return None

        # Filter by area with adaptive thresholds
        valid_contours = []
        for c in contours:
            area = cv2.contourArea(c)
            if self.min_hand_area < area < self.max_hand_area:
                valid_contours.append(c)

        if not valid_contours:
            return None

        # Get largest valid contour
        max_contour = max(valid_contours, key=cv2.contourArea)

        # Additional validation: check aspect ratio
        x, y, w, h = cv2.boundingRect(max_contour)
        aspect_ratio = float(w) / h if h != 0 else 0

        # Hand should have reasonable aspect ratio (0.4 to 2.5)
        if 0.4 < aspect_ratio < 2.5:
            return max_contour

        return None

    def recognize_gesture(self, contour, frame_width, frame_height):
        """
        More accurate gesture recognition with adjusted zones

        GESTURES:
        - Hand on LEFT third ‚Üí LEFT
        - Hand on RIGHT third ‚Üí RIGHT
        - Hand on TOP third ‚Üí UP
        - Hand in CENTER + SMALL ‚Üí STOP
        - Hand in CENTER + LARGE ‚Üí HOVER
        """
        if contour is None:
            return "none"

        # Get bounding box
        x, y, w, h = cv2.boundingRect(contour)

        # Calculate center of hand
        cx = x + w // 2
        cy = y + h // 2

        # Calculate hand area
        area = cv2.contourArea(contour)

        # Define clearer zones (thirds)
        left_boundary = frame_width * 0.33
        right_boundary = frame_width * 0.67
        top_boundary = frame_height * 0.33
        bottom_boundary = frame_height * 0.67

        # Adaptive size thresholds based on frame size
        frame_area = frame_width * frame_height
        small_threshold = frame_area * 0.025  # 2.5% of frame
        large_threshold = frame_area * 0.055  # 5.5% of frame

        # PRIORITY 1: Position-based gestures (easier to detect)
        if cx < left_boundary:
            return "left"

        if cx > right_boundary:
            return "right"

        if cy < top_boundary:
            return "up"

        # PRIORITY 2: Center gestures (size-based)
        if left_boundary <= cx <= right_boundary:
            if cy >= top_boundary:  # Must be in center or bottom
                if area < small_threshold:
                    return "stop"  # Small = fist
                elif area > large_threshold:
                    return "hover"  # Large = open palm

        return "none"

    def get_stable_gesture(self, current_gesture):
        """
        Return gesture only if it's been stable with weighted voting
        Recent frames have more weight
        """
        # Add current gesture to buffer
        self.gesture_buffer.append(current_gesture)

        # Need minimum frames
        if len(self.gesture_buffer) < self.confirmation_threshold:
            return "none"

        # Count occurrences with recency bias
        gesture_counts = {}
        buffer_list = list(self.gesture_buffer)

        for i, g in enumerate(buffer_list):
            # Give more weight to recent frames (last 10 frames get 2x weight)
            weight = 2 if i >= len(buffer_list) - 10 else 1
            gesture_counts[g] = gesture_counts.get(g, 0) + weight

        # Remove "none" from consideration unless it's dominant
        if "none" in gesture_counts and len(gesture_counts) > 1:
            none_weight = gesture_counts["none"]
            total_weight = sum(gesture_counts.values())
            # Only keep "none" if it's more than 60% of total
            if none_weight < total_weight * 0.6:
                del gesture_counts["none"]

        if not gesture_counts:
            return "none"

        # Find most common gesture
        most_common = max(gesture_counts, key=gesture_counts.get)

        # Calculate what percentage of recent frames match
        recent_frames = buffer_list[-10:]
        recent_match_count = sum(1 for g in recent_frames if g == most_common)

        # Need at least 70% consistency in recent frames
        if recent_match_count >= 7:
            return most_common

        return "none"

    def can_send_command(self):
        """Check if enough time has passed since last command"""
        current_time = time.time()
        if current_time - self.last_command_time >= self.command_cooldown:
            return True
        return False

    def update_command_time(self):
        """Update the last command timestamp"""
        self.last_command_time = time.time()

    def get_cooldown_remaining(self):
        """Get remaining cooldown time"""
        elapsed = time.time() - self.last_command_time
        remaining = max(0, self.command_cooldown - elapsed)
        return remaining

# -----------------------------
# Main Application
# -----------------------------
def main():
    cap = cv2.VideoCapture(0)
    recognizer = GestureRecognizer()
    current_command = "none"

    # Display options
    show_rgb = True  # Toggle to show RGB instead of BGR
    show_debug = True  # Toggle to show debug visualizations

    print("=" * 60)
    print("üöÅ GESTURE DRONE CONTROLLER - ENHANCED VERSION üöÅ")
    print("=" * 60)
    print("\nüìã CALIBRATION (IMPORTANT!):")
    print("   1. Position camera to see plain background")
    print("   2. Keep BOTH HANDS away from camera view")
    print("   3. Stay COMPLETELY STILL for 3-4 seconds")
    print("   4. Wait for 'CALIBRATION COMPLETE' message")
    print("\n‚úã SIMPLE GESTURES:")
    print("   üëà Move hand to LEFT third ‚Üí LEFT")
    print("   üëâ Move hand to RIGHT third ‚Üí RIGHT")
    print("   üëÜ Move hand to TOP third ‚Üí UP")
    print("   ‚úä Center + SMALL hand (fist) ‚Üí STOP")
    print("   üñêÔ∏è  Center + LARGE hand (open) ‚Üí HOVER")
    print("\nüí° TIPS:")
    print("   - Use plain/solid background for best results")
    print("   - Keep hand movements smooth and deliberate")
    print("   - Wait for stability bar to fill before moving")
    print("   - Press 'r' to recalibrate if detection is poor")
    print("\n‚öôÔ∏è  CONTROLS:")
    print("   'q' - Quit")
    print("   'r' - Recalibrate background")
    print("   't' - Toggle RGB/BGR display")
    print("   'd' - Toggle debug mask view")
    print("=" * 60)

    # Calibration phase
    print("\n‚è≥ STARTING CALIBRATION...")
    print("üì∑ Position yourself and stay still!")
    time.sleep(1)  # Give user a moment to read

    while True:
        success, frame = cap.read()
        if not success:
            break

        frame = cv2.flip(frame, 1)
        height, width, _ = frame.shape

        # ROI definition
        roi_top, roi_bottom = 50, 430
        roi_left, roi_right = 100, 540

        # Extract ROI
        roi = frame[roi_top:roi_bottom, roi_left:roi_right].copy()
        roi_height, roi_width = roi.shape[:2]

        # CALIBRATION PHASE
        if not recognizer.is_calibrated:
            is_complete = recognizer.calibrate_background(roi)
            progress = (recognizer.bg_frames_count / recognizer.bg_calibration_frames) * 100

            # Draw calibration UI
            cv2.rectangle(frame, (roi_left, roi_top), (roi_right, roi_bottom), (0, 165, 255), 3)

            # Instructions
            cv2.putText(frame, "CALIBRATING BACKGROUND...", (width // 2 - 200, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 165, 255), 2)
            cv2.putText(frame, "Keep hand OUT of the box!", (roi_left + 40, roi_top - 50),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 165, 255), 2)
            cv2.putText(frame, "Stay still for best results", (roi_left + 40, roi_top - 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)

            # Progress bar (large and centered)
            bar_x = width // 2 - 200
            bar_y = height // 2
            bar_width = 400
            bar_height = 40

            # Background
            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_width, bar_y + bar_height), (100, 100, 100), 2)

            # Fill
            fill_width = int((progress / 100) * (bar_width - 4))
            cv2.rectangle(frame, (bar_x + 2, bar_y + 2), (bar_x + 2 + fill_width, bar_y + bar_height - 2), (0, 255, 0), -1)

            # Progress text
            cv2.putText(frame, f"{int(progress)}%", (bar_x + bar_width // 2 - 30, bar_y + 28),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

            # Frame counter
            cv2.putText(frame, f"Frame {recognizer.bg_frames_count}/{recognizer.bg_calibration_frames}",
                       (bar_x, bar_y + bar_height + 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)

            if is_complete:
                # Show completion message
                cv2.putText(frame, "CALIBRATION COMPLETE!", (width // 2 - 180, height // 2 - 60),
                           cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
                cv2.putText(frame, "You can now use gestures!", (width // 2 - 150, height // 2 - 20),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        # GESTURE RECOGNITION PHASE
        else:
            # Process hand
            mask = recognizer.preprocess(roi)
            contour = recognizer.find_hand_contour(mask)

            # Recognize gesture (raw)
            raw_gesture = "none"
            hand_area = 0
            hand_center = (0, 0)
            area_percentage = 0

            if contour is not None:
                # Draw contour with thick line
                cv2.drawContours(roi, [contour], -1, (0, 255, 0), 3)

                # Get hand info
                x, y, w, h = cv2.boundingRect(contour)
                hand_center = (x + w // 2, y + h // 2)
                hand_area = cv2.contourArea(contour)
                frame_area = roi_width * roi_height
                area_percentage = (hand_area / frame_area) * 100

                # Draw bounding box
                cv2.rectangle(roi, (x, y), (x + w, y + h), (255, 0, 255), 2)

                # Draw center point (large)
                cv2.circle(roi, hand_center, 8, (0, 0, 255), -1)
                cv2.circle(roi, hand_center, 10, (255, 255, 255), 2)

                # Show size indicator
                size_text = f"{area_percentage:.1f}%"
                cv2.putText(roi, size_text, (x, y - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

                raw_gesture = recognizer.recognize_gesture(contour, roi_width, roi_height)

            # Get stable gesture
            stable_gesture = recognizer.get_stable_gesture(raw_gesture)

            # Send command
            if stable_gesture != "none" and stable_gesture != current_command:
                if recognizer.can_send_command():
                    current_command = stable_gesture
                    recognizer.update_command_time()

                    # Execute command
                    if stable_gesture == "left":
                        move_left()
                    elif stable_gesture == "right":
                        move_right()
                    elif stable_gesture == "up":
                        move_up()
                    elif stable_gesture == "hover":
                        hover()
                    elif stable_gesture == "stop":
                        stop()

            # Check cooldown status
            if recognizer.can_send_command():
                roi_color = (0, 255, 0)  # Green = ready
                status_text = "‚úÖ READY"
            else:
                roi_color = (0, 165, 255)  # Orange = cooldown
                cooldown = recognizer.get_cooldown_remaining()
                status_text = f"‚è≥ COOLDOWN: {cooldown:.1f}s"

            # Draw ROI box with thick border
            cv2.rectangle(frame, (roi_left, roi_top), (roi_right, roi_bottom), roi_color, 4)

            # Draw zone guides (clearer thirds)
            left_line = roi_left + int(roi_width * 0.33)
            right_line = roi_left + int(roi_width * 0.67)
            top_line = roi_top + int(roi_height * 0.33)

            # Vertical lines
            cv2.line(frame, (left_line, roi_top), (left_line, roi_bottom), (255, 255, 255), 2)
            cv2.line(frame, (right_line, roi_top), (right_line, roi_bottom), (255, 255, 255), 2)

            # Horizontal line
            cv2.line(frame, (roi_left, top_line), (roi_right, top_line), (255, 255, 255), 2)

            # Zone labels (larger)
            cv2.putText(frame, "LEFT", (left_line - 70, roi_top + 40),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, "RIGHT", (right_line + 20, roi_top + 40),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, "UP", (roi_left + roi_width // 2 - 20, top_line - 15),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

            # Center zone labels
            center_x = roi_left + roi_width // 2
            center_y = roi_top + int(roi_height * 0.60)
            cv2.putText(frame, "STOP (small)", (center_x - 70, center_y),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
            cv2.putText(frame, "HOVER (large)", (center_x - 75, center_y + 25),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)

            # Display current command (extra large)
            cmd_color = (0, 255, 0) if current_command != "none" else (100, 100, 100)
            cv2.putText(frame, f"CMD: {current_command.upper()}", (20, 60),
                       cv2.FONT_HERSHEY_SIMPLEX, 1.5, cmd_color, 4)

            # Display raw detection
            cv2.putText(frame, f"Detect: {raw_gesture.upper()}", (20, 110),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

            # Display hand info
            info_y = 150
            cv2.putText(frame, f"Area: {int(hand_area)} ({area_percentage:.1f}%)", (20, info_y),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
            cv2.putText(frame, f"Center: ({hand_center[0]}, {hand_center[1]})", (20, info_y + 25),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)

            # Status (large)
            cv2.putText(frame, status_text, (20, height - 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 1.0, roi_color, 2)

            # Enhanced stability bar
            bar_x = width - 230
            bar_y = 20
            bar_w = 220
            bar_h = 50

            # Background
            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (50, 50, 50), -1)
            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (255, 255, 255), 2)

            # Calculate stability
            buffer_list = list(recognizer.gesture_buffer)
            if raw_gesture != "none" and buffer_list:
                stable_count = sum(1 for g in buffer_list if g == raw_gesture)
                stability_percent = (stable_count / len(buffer_list)) * 100

                # Color based on stability
                if stable_count >= recognizer.confirmation_threshold:
                    bar_color = (0, 255, 0)  # Green = confirmed
                elif stable_count >= 10:
                    bar_color = (0, 255, 255)  # Yellow = getting there
                else:
                    bar_color = (0, 165, 255)  # Orange = unstable

                # Fill bar
                fill_w = int((stable_count / 25) * (bar_w - 8))
                cv2.rectangle(frame, (bar_x + 4, bar_y + 4), (bar_x + 4 + fill_w, bar_y + bar_h - 4), bar_color, -1)

                # Text
                cv2.putText(frame, f"Stability: {stable_count}/25", (bar_x + 10, bar_y + 23),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
                cv2.putText(frame, f"{int(stability_percent)}%", (bar_x + 10, bar_y + 42),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            else:
                cv2.putText(frame, "No hand detected", (bar_x + 20, bar_y + 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (150, 150, 150), 1)

            # Debug view - show mask (larger)
            if show_debug:
                mask_small = cv2.resize(mask, (200, 200))
                mask_bgr = cv2.cvtColor(mask_small, cv2.COLOR_GRAY2BGR)

                # Add border
                cv2.rectangle(mask_bgr, (0, 0), (199, 199), (255, 255, 255), 2)

                frame[height-210:height-10, 10:210] = mask_bgr
                cv2.putText(frame, "MASK VIEW", (15, height - 215),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        # Convert to RGB if toggled
        display_frame = frame.copy()
        if show_rgb:
            display_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            cv2.putText(display_frame, "RGB Mode", (width - 120, height - 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        cv2.imshow("Gesture Drone Control", display_frame)

        # Keyboard controls
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('r'):
            # Recalibrate
            print("\nüîÑ RECALIBRATING... Keep hand away!")
            recognizer = GestureRecognizer()
            current_command = "none"
        elif key == ord('t'):
            # Toggle RGB/BGR
            show_rgb = not show_rgb
            print(f"Display mode: {'RGB' if show_rgb else 'BGR'}")
        elif key == ord('d'):
            # Toggle debug
            show_debug = not show_debug
            print(f"Debug view: {'ON' if show_debug else 'OFF'}")

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
    '''

In [None]:
''' python 3.11.9 + mediapipe'''
'''
import cv2
import time
import numpy as np
from collections import deque

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# =========================================================
# Dummy Drone Control Functions
# =========================================================
def move_left():
    print("üü¶ DRONE COMMAND: LEFT")

def move_right():
    print("üü• DRONE COMMAND: RIGHT")

def move_up():
    print("üü© DRONE COMMAND: UP")

def hover():
    print("‚¨ú DRONE COMMAND: HOVER")

def stop():
    print("üü• DRONE COMMAND: STOP")

def summersault():
    print("ü§ò DRONE COMMAND: SUMMERSAULT")

# =========================================================
# Gesture Recognizer (MediaPipe Tasks API)
# =========================================================
class GestureRecognizer:
    def __init__(self):
        base_options = python.BaseOptions(
            model_asset_path="hand_landmarker.task"
        )

        options = vision.HandLandmarkerOptions(
            base_options=base_options,
            num_hands=1,
            min_hand_detection_confidence=0.7,
            min_hand_presence_confidence=0.7,
            min_tracking_confidence=0.7
        )

        self.detector = vision.HandLandmarker.create_from_options(options)

        # Temporal stability - optimized for faster response
        self.buffer = deque(maxlen=15)
        self.confirmation_threshold = 10

        # Cooldown - reduced for sequence chaining
        self.last_command_time = 0
        self.cooldown = 0.3  # Shorter cooldown for faster chaining

        # Sequence chaining
        self.sequence = []
        self.max_sequence_length = 4
        self.sequence_timeout = 0.4  # 0.4s between commands to chain
        self.last_gesture_time = 0
        self.sequence_active = False

    # -----------------------------
    # Finger State Logic (FIXED)
    # -----------------------------
    def fingers_extended(self, lm):
        """
        Returns [thumb, index, middle, ring, pinky] extended state.
        Fixed: fingers point up when tip.y < pip.y (lower y = higher on screen)
        """
        fingers = []

        # Thumb: compare x-axis (special case for horizontal extension)
        thumb_tip = lm[4]
        thumb_ip = lm[3]
        wrist = lm[0]

        # Determine if hand is on left or right side
        if thumb_tip.x < wrist.x:  # Hand on left side
            thumb_extended = thumb_tip.x < thumb_ip.x
        else:  # Hand on right side
            thumb_extended = thumb_tip.x > thumb_ip.x

        fingers.append(thumb_extended)

        # Other fingers: tip above pip (tip.y < pip.y)
        tip_ids = [8, 12, 16, 20]
        pip_ids = [6, 10, 14, 18]

        for tip, pip in zip(tip_ids, pip_ids):
            fingers.append(lm[tip].y < lm[pip].y)

        return fingers

    # -----------------------------
    # Gesture Recognition (IMPROVED)
    # -----------------------------
    def recognize(self, landmarks):
        """
        Recognizes gestures with improved accuracy, especially for 'up'.
        Uses angle-based detection for directional gestures.
        """
        fingers = self.fingers_extended(landmarks)
        thumb, index, middle, ring, pinky = fingers

        wrist = landmarks[0]
        index_tip = landmarks[8]

        # Calculate displacement
        dx = index_tip.x - wrist.x
        dy = wrist.y - index_tip.y  # Positive when finger points up

        # Rock horns ü§ò (index and pinky extended, middle and ring folded)
        if index and pinky and not middle and not ring:
            return "summersault"

        # Index-only gestures (pointing)
        if index and not middle and not ring and not pinky:
            # Calculate angle from horizontal
            angle = np.arctan2(dy, abs(dx)) * 180 / np.pi

            # UP: finger mostly vertical (>55 degrees from horizontal)
            if angle > 55:
                return "up"
            # LEFT: finger pointing left with strong horizontal component
            elif dx < -0.10 and angle < 45:
                return "left"
            # RIGHT: finger pointing right with strong horizontal component
            elif dx > 0.10 and angle < 45:
                return "right"
            else:
                # Default to up if pointing generally upward
                return "up" if dy > 0.05 else "none"

        # Open palm (all fingers extended)
        if index and middle and ring and pinky:
            return "hover"

        # Fist (no fingers extended)
        if not any(fingers):
            return "stop"

        return "none"

    # -----------------------------
    # Temporal Stability
    # -----------------------------
    def stable_gesture(self, gesture):
        """
        Applies temporal smoothing to reduce jitter.
        A gesture must appear consistently to be confirmed.
        """
        self.buffer.append(gesture)

        if len(self.buffer) < self.confirmation_threshold:
            return "none"

        # Count occurrences of each gesture
        counts = {}
        for g in self.buffer:
            if g != "none":
                counts[g] = counts.get(g, 0) + 1

        if not counts:
            return "none"

        # Return gesture only if it appears enough times
        best = max(counts, key=counts.get)
        return best if counts[best] >= self.confirmation_threshold else "none"

    # -----------------------------
    # Cooldown Control
    # -----------------------------
    def can_send(self):
        """Check if enough time has passed since last command."""
        return time.time() - self.last_command_time >= self.cooldown

    def mark_sent(self):
        """Mark that a command was just sent."""
        self.last_command_time = time.time()

    # -----------------------------
    # Sequence Chaining
    # -----------------------------
    def add_to_sequence(self, gesture):
        """
        Add a gesture to the sequence buffer.
        If 0.4s passes without a new gesture, execute the sequence.
        """
        current_time = time.time()

        # Check if we should start a new sequence
        if current_time - self.last_gesture_time > self.sequence_timeout:
            # Timeout - execute current sequence if it exists
            if self.sequence:
                self.execute_sequence()
            self.sequence = []

        # Add gesture to sequence
        if len(self.sequence) < self.max_sequence_length:
            self.sequence.append(gesture)
            self.last_gesture_time = current_time
            self.sequence_active = True
            return False  # Not ready to execute yet
        else:
            # Sequence is full, execute it
            self.execute_sequence()
            return True

    def execute_sequence(self):
        """Execute the chained sequence of commands."""
        if not self.sequence:
            return

        print("\n" + "="*50)
        print(f"üéØ EXECUTING SEQUENCE: {' ‚Üí '.join([s.upper() for s in self.sequence])}")
        print("="*50)

        for i, gesture in enumerate(self.sequence, 1):
            print(f"  [{i}/{len(self.sequence)}] ", end="")
            if gesture == "left":
                move_left()
            elif gesture == "right":
                move_right()
            elif gesture == "up":
                move_up()
            elif gesture == "hover":
                hover()
            elif gesture == "stop":
                stop()
            elif gesture == "summersault":
                summersault()

            # Delay between commands in sequence for smoother execution
            if i < len(self.sequence):
                time.sleep(0.5)

        print("="*50 + "\n")

        # Clear sequence
        self.sequence = []
        self.sequence_active = False

    def check_sequence_timeout(self):
        """Check if sequence has timed out and should be executed."""
        if self.sequence and time.time() - self.last_gesture_time > self.sequence_timeout:
            self.execute_sequence()

    def get_sequence_status(self):
        """Get current sequence information."""
        return {
            'sequence': self.sequence.copy(),
            'count': len(self.sequence),
            'max': self.max_sequence_length,
            'active': self.sequence_active
        }

# =========================================================
# Main Application
# =========================================================
def main():
    cap = cv2.VideoCapture(0)
    recognizer = GestureRecognizer()
    current_command = "none"

    print("\nüöÅ MEDIAPIPE GESTURE DRONE CONTROLLER üöÅ")
    print("=" * 50)
    print("Gestures:")
    print("  üëÜ Index finger UP ‚Üí MOVE UP")
    print("  üëà Index finger LEFT ‚Üí MOVE LEFT")
    print("  üëâ Index finger RIGHT ‚Üí MOVE RIGHT")
    print("  üñê  Open palm ‚Üí HOVER")
    print("  ‚úä  Fist ‚Üí STOP")
    print("  ü§ò Rock horns ‚Üí SUMMERSAULT")
    print()
    print("‚õìÔ∏è  SEQUENCE CHAINING:")
    print("  Chain up to 4 maneuvers by performing gestures")
    print("  within 0.4s of each other. The sequence executes")
    print("  automatically after 0.4s or when 4 moves are chained.")
    print("=" * 50)
    print("Press 'q' to quit\n")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        # Flip frame for mirror effect
        frame = cv2.flip(frame, 1)
        h, w, _ = frame.shape

        # Define ROI (Region of Interest)
        roi_x1, roi_y1 = 120, 60
        roi_x2, roi_y2 = 520, 420
        roi = frame[roi_y1:roi_y2, roi_x1:roi_x2]

        # Convert to RGB for MediaPipe
        rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)

        # Create MediaPipe Image
        mp_image = mp.Image(
            image_format=mp.ImageFormat.SRGB,
            data=rgb
        )

        # Detect hand landmarks
        result = recognizer.detector.detect(mp_image)

        raw_gesture = "none"

        # Process detection results
        if result.hand_landmarks:
            landmarks = result.hand_landmarks[0]
            raw_gesture = recognizer.recognize(landmarks)

            # Draw hand landmarks on frame
            for landmark in landmarks:
                x = int(landmark.x * (roi_x2 - roi_x1)) + roi_x1
                y = int(landmark.y * (roi_y2 - roi_y1)) + roi_y1
                cv2.circle(frame, (x, y), 5, (0, 255, 255), -1)

        # Apply temporal stability
        stable = recognizer.stable_gesture(raw_gesture)

        # Check if sequence has timed out
        recognizer.check_sequence_timeout()

        # Send command if stable
        if stable != "none":
            if recognizer.can_send():
                # Only add if it's different from the last gesture in sequence
                # or if sequence is empty
                seq_status = recognizer.get_sequence_status()
                last_in_seq = seq_status['sequence'][-1] if seq_status['sequence'] else None

                if stable != last_in_seq:
                    current_command = stable
                    recognizer.mark_sent()
                    # Add to sequence instead of executing immediately
                    sequence_full = recognizer.add_to_sequence(stable)

        # -----------------------------
        # UI Rendering
        # -----------------------------

        # Get sequence status
        seq_status = recognizer.get_sequence_status()

        # Draw ROI rectangle
        cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2), (0, 255, 0), 3)

        # Draw background for text
        cv2.rectangle(frame, (0, 0), (640, 150), (0, 0, 0), -1)

        # Display raw gesture
        cv2.putText(frame, f"RAW: {raw_gesture.upper()}",
                    (20, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        # Display current command (larger and colored)
        color = (0, 255, 0) if current_command != "none" else (100, 100, 100)
        cv2.putText(frame, f"CMD: {current_command.upper()}",
                    (20, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 3)

        # Display sequence chain
        if seq_status['count'] > 0:
            seq_text = " ‚Üí ".join([s.upper() for s in seq_status['sequence']])
            cv2.putText(frame, f"CHAIN [{seq_status['count']}/{seq_status['max']}]: {seq_text}",
                        (20, 130),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        # Display buffer status
        buffer_fill = len(recognizer.buffer)
        cv2.putText(frame, f"Buffer: {buffer_fill}/{recognizer.confirmation_threshold}",
                    (450, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)

        # Show frame
        cv2.imshow("Gesture Drone Control (MediaPipe)", frame)

        # Exit on 'q' key
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
    print("\n‚úÖ Application closed successfully")

if __name__ == "__main__":
    main()
    '''

In [3]:
"""
ALL-IN-ONE CNN GESTURE RECOGNITION SYSTEM
"""

import cv2
import numpy as np
import time
import sqlite3
import pickle
import os
from collections import deque
from datetime import datetime

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# =========================================================
# DATABASE MANAGER
# =========================================================
class GestureDatabase:
    """Manages gesture data storage and retrieval"""

    def __init__(self, db_path="gesture_database.db"):
        self.db_path = db_path
        self.conn = None
        self.init_database()

    def init_database(self):
        """Initialize database tables"""
        self.conn = sqlite3.connect(self.db_path)
        cursor = self.conn.cursor()

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS gesture_samples (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                gesture_name TEXT NOT NULL,
                landmarks BLOB NOT NULL,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                hand_edness TEXT,
                confidence REAL
            )
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS gesture_labels (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                gesture_name TEXT UNIQUE NOT NULL,
                description TEXT,
                drone_command TEXT,
                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
            )
        """)

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS training_sessions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                model_path TEXT,
                accuracy REAL,
                loss REAL,
                epochs INTEGER,
                samples_count INTEGER,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
            )
        """)

        self.conn.commit()

    def add_gesture_label(self, name, description, drone_command):
        """Add a new gesture label"""
        cursor = self.conn.cursor()
        try:
            cursor.execute("""
                INSERT INTO gesture_labels (gesture_name, description, drone_command)
                VALUES (?, ?, ?)
            """, (name, description, drone_command))
            self.conn.commit()
            return True
        except sqlite3.IntegrityError:
            return False

    def save_sample(self, gesture_name, landmarks, hand_edness="Right", confidence=1.0):
        """Save a gesture sample to database"""
        cursor = self.conn.cursor()
        landmarks_blob = pickle.dumps(landmarks)

        cursor.execute("""
            INSERT INTO gesture_samples (gesture_name, landmarks, hand_edness, confidence)
            VALUES (?, ?, ?, ?)
        """, (gesture_name, landmarks_blob, hand_edness, confidence))

        self.conn.commit()

    def get_samples(self, gesture_name=None):
        """Retrieve gesture samples"""
        cursor = self.conn.cursor()

        if gesture_name:
            cursor.execute("""
                SELECT gesture_name, landmarks FROM gesture_samples
                WHERE gesture_name = ?
            """, (gesture_name,))
        else:
            cursor.execute("""
                SELECT gesture_name, landmarks FROM gesture_samples
            """)

        samples = []
        for row in cursor.fetchall():
            name, landmarks_blob = row
            landmarks = pickle.loads(landmarks_blob)
            samples.append((name, landmarks))

        return samples

    def get_all_gesture_names(self):
        """Get list of all unique gesture names"""
        cursor = self.conn.cursor()
        cursor.execute("SELECT DISTINCT gesture_name FROM gesture_samples ORDER BY gesture_name")
        return [row[0] for row in cursor.fetchall()]

    def get_sample_count(self):
        """Get count of samples per gesture"""
        cursor = self.conn.cursor()
        cursor.execute("""
            SELECT gesture_name, COUNT(*) as count
            FROM gesture_samples
            GROUP BY gesture_name
            ORDER BY gesture_name
        """)
        return dict(cursor.fetchall())

    def save_training_session(self, model_path, accuracy, loss, epochs, samples_count):
        """Save training session metadata"""
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO training_sessions (model_path, accuracy, loss, epochs, samples_count)
            VALUES (?, ?, ?, ?, ?)
        """, (model_path, accuracy, loss, epochs, samples_count))
        self.conn.commit()

    def clear_samples(self, gesture_name=None):
        """Clear samples (for a specific gesture or all)"""
        cursor = self.conn.cursor()
        if gesture_name:
            cursor.execute("DELETE FROM gesture_samples WHERE gesture_name = ?", (gesture_name,))
        else:
            cursor.execute("DELETE FROM gesture_samples")
        self.conn.commit()

    def close(self):
        """Close database connection"""
        if self.conn:
            self.conn.close()

# =========================================================
# CNN MODEL
# =========================================================
class GestureCNN:
    """CNN model for gesture classification"""

    def __init__(self, num_classes, input_shape=(21, 3)):
        self.num_classes = num_classes
        self.input_shape = input_shape
        self.model = None
        self.label_encoder = {}
        self.label_decoder = {}

    def build_model(self):
        """Build CNN architecture"""
        model = models.Sequential([
            layers.Flatten(input_shape=self.input_shape),

            layers.Dense(128, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),

            layers.Dense(64, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),

            layers.Dense(32, activation='relu'),
            layers.Dropout(0.2),

            layers.Dense(self.num_classes, activation='softmax')
        ])

        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        self.model = model
        return model

    def prepare_data(self, samples):
        """Prepare data from database samples for training"""
        X = []
        y = []

        unique_labels = sorted(list(set([name for name, _ in samples])))
        self.label_encoder = {label: idx for idx, label in enumerate(unique_labels)}
        self.label_decoder = {idx: label for label, idx in self.label_encoder.items()}

        for gesture_name, landmarks in samples:
            landmark_array = np.array([[lm.x, lm.y, lm.z] for lm in landmarks])
            X.append(landmark_array)
            y.append(self.label_encoder[gesture_name])

        X = np.array(X)
        y = to_categorical(y, num_classes=self.num_classes)

        return X, y

    def train(self, X, y, epochs=50, validation_split=0.2, batch_size=32):
        """Train the CNN model"""

        if self.model is None:
            self.build_model()

        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )

        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=0.00001
        )

        history = self.model.fit(
            X, y,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )

        return history

    def save_model(self, model_path="gesture_cnn_model.h5"):
        """Save trained model"""
        self.model.save(model_path)

        with open(model_path.replace('.h5', '_labels.pkl'), 'wb') as f:
            pickle.dump({
                'encoder': self.label_encoder,
                'decoder': self.label_decoder
            }, f)

    def load_model(self, model_path="gesture_cnn_model.h5"):
        """Load trained model"""
        self.model = keras.models.load_model(model_path)

        with open(model_path.replace('.h5', '_labels.pkl'), 'rb') as f:
            labels = pickle.load(f)
            self.label_encoder = labels['encoder']
            self.label_decoder = labels['decoder']

        self.num_classes = len(self.label_encoder)

    def predict(self, landmarks):
        """Predict gesture from landmarks"""
        landmark_array = np.array([[lm.x, lm.y, lm.z] for lm in landmarks])
        landmark_array = landmark_array.reshape(1, 21, 3)

        predictions = self.model.predict(landmark_array, verbose=0)

        class_idx = np.argmax(predictions[0])
        confidence = predictions[0][class_idx]
        gesture_name = self.label_decoder[class_idx]

        return gesture_name, confidence

# =========================================================
# GESTURE RECOGNIZER
# =========================================================
class HybridGestureRecognizer:
    """Combines MediaPipe detection with CNN classification"""

    def __init__(self, use_cnn=True, model_path=None):
        # MediaPipe hand detector
        base_options = python.BaseOptions(
            model_asset_path="hand_landmarker.task"
        )

        options = vision.HandLandmarkerOptions(
            base_options=base_options,
            num_hands=1,
            min_hand_detection_confidence=0.7,
            min_hand_presence_confidence=0.7,
            min_tracking_confidence=0.7
        )

        self.detector = vision.HandLandmarker.create_from_options(options)

        # CNN classifier
        self.use_cnn = use_cnn
        self.cnn = None

        if use_cnn and model_path and os.path.exists(model_path):
            self.cnn = GestureCNN(num_classes=6)
            self.cnn.load_model(model_path)

        # Stability buffer
        self.buffer = deque(maxlen=15)
        self.confirmation_threshold = 10

        # Cooldown
        self.last_command_time = 0
        self.cooldown = 2.0

    def detect_hand(self, frame):
        """Detect hand landmarks using MediaPipe"""
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)

        result = self.detector.detect(mp_image)

        if result.hand_landmarks:
            return result.hand_landmarks[0]

        return None

    def recognize_gesture(self, landmarks):
        """Recognize gesture using CNN"""
        if self.use_cnn and self.cnn:
            gesture_name, confidence = self.cnn.predict(landmarks)
            return gesture_name, confidence
        else:
            return "none", 0.0

    def stable_gesture(self, gesture):
        """Apply temporal smoothing"""
        self.buffer.append(gesture)

        if len(self.buffer) < self.confirmation_threshold:
            return "none"

        counts = {}
        for g in self.buffer:
            if g != "none":
                counts[g] = counts.get(g, 0) + 1

        if not counts:
            return "none"

        best = max(counts, key=counts.get)
        return best if counts[best] >= self.confirmation_threshold else "none"

    def can_send(self):
        return time.time() - self.last_command_time >= self.cooldown

    def mark_sent(self):
        self.last_command_time = time.time()

# =========================================================
# DRONE COMMANDS
# =========================================================
def move_left():
    print("üîµ DRONE COMMAND: LEFT")

def move_right():
    print("üî¥ DRONE COMMAND: RIGHT")

def move_up():
    print("üü¢ DRONE COMMAND: UP")

def hover():
    print("‚ö™ DRONE COMMAND: HOVER")

def stop():
    print("üõë DRONE COMMAND: STOP")

def summersault():
    print("ü§∏ DRONE COMMAND: SUMMERSAULT")

COMMAND_MAP = {
    'left': move_left,
    'right': move_right,
    'up': move_up,
    'hover': hover,
    'stop': stop,
    'summersault': summersault
}

# =========================================================
# DATA COLLECTION MODULE
# =========================================================
def collect_data():
    """Data collection interface"""
    db = GestureDatabase()
    recognizer = HybridGestureRecognizer(use_cnn=False)

    # Setup gestures
    gestures = {
        '1': 'left',
        '2': 'right',
        '3': 'up',
        '4': 'hover',
        '5': 'stop',
        '6': 'summersault'
    }

    # Add gesture labels
    for name in gestures.values():
        db.add_gesture_label(name, f'Gesture {name}', f'move_{name}')

    cap = cv2.VideoCapture(0)

    current_gesture = None
    collecting = False
    sample_count = 0
    target_samples = 100

    print("\n" + "="*60)
    print("üìä DATA COLLECTION MODE")
    print("="*60)
    print("\nGestures:")
    for key, name in gestures.items():
        print(f"  {key} - {name.upper()}")
    print("\nControls:")
    print("  1-6: Select gesture")
    print("  c: Stop collecting")
    print("  s: Show statistics")
    print("  q: Quit")
    print("="*60 + "\n")

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        h, w, _ = frame.shape

        roi_x1, roi_y1 = 120, 60
        roi_x2, roi_y2 = 520, 420
        roi = frame[roi_y1:roi_y2, roi_x1:roi_x2]

        landmarks = recognizer.detect_hand(roi)

        if landmarks:
            for landmark in landmarks:
                x = int(landmark.x * (roi_x2 - roi_x1)) + roi_x1
                y = int(landmark.y * (roi_y2 - roi_y1)) + roi_y1
                cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)

            if collecting and current_gesture:
                db.save_sample(current_gesture, landmarks)
                sample_count += 1

                if sample_count >= target_samples:
                    print(f"‚úÖ Collected {sample_count} samples for {current_gesture}")
                    collecting = False
                    current_gesture = None
                    sample_count = 0

        # UI
        cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2), (0, 255, 0), 3)
        cv2.rectangle(frame, (0, 0), (w, 100), (0, 0, 0), -1)

        if collecting and current_gesture:
            status_text = f"COLLECTING: {current_gesture.upper()}"
            color = (0, 255, 0)
            progress = f"{sample_count}/{target_samples}"
            cv2.putText(frame, progress, (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
        else:
            status_text = "SELECT GESTURE (1-6)"
            color = (100, 100, 100)

        cv2.putText(frame, status_text, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2)

        hand_status = "HAND DETECTED" if landmarks else "NO HAND"
        hand_color = (0, 255, 0) if landmarks else (0, 0, 255)
        cv2.putText(frame, hand_status, (400, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, hand_color, 2)

        cv2.imshow("Data Collection", frame)

        key = cv2.waitKey(1) & 0xFF

        if key == ord('q'):
            break
        elif key == ord('c'):
            if collecting:
                print(f"‚è∏Ô∏è  Stopped collecting {current_gesture} (saved {sample_count} samples)")
            collecting = False
            current_gesture = None
            sample_count = 0
        elif key == ord('s'):
            stats = db.get_sample_count()
            print("\n" + "="*40)
            print("üìä DATABASE STATISTICS")
            print("="*40)
            for gesture, count in stats.items():
                print(f"  {gesture.upper():15s}: {count:4d} samples")
            print("="*40 + "\n")
        elif chr(key) in gestures:
            current_gesture = gestures[chr(key)]
            collecting = True
            sample_count = 0
            print(f"\n‚ñ∂Ô∏è  Collecting '{current_gesture}' (target: {target_samples})")

    cap.release()
    cv2.destroyAllWindows()

    # Final stats
    stats = db.get_sample_count()
    total = sum(stats.values())
    print("\n" + "="*40)
    print("üìä FINAL STATISTICS")
    print("="*40)
    for gesture, count in stats.items():
        print(f"  {gesture.upper():15s}: {count:4d} samples")
    print("="*40)
    print(f"  TOTAL:          {total:4d} samples")
    print("="*40 + "\n")

    db.close()

# =========================================================
# TRAINING MODULE
# =========================================================
def train_model():
    """Model training interface"""
    print("\n" + "="*60)
    print("üéì MODEL TRAINING")
    print("="*60)

    db = GestureDatabase()

    stats = db.get_sample_count()

    if not stats:
        print("\n‚ùå No samples found!")
        print("Run data collection first (option 1).")
        db.close()
        return

    total = sum(stats.values())
    print("\nüìä Database Statistics:")
    for gesture, count in stats.items():
        print(f"  {gesture.upper():15s}: {count:4d} samples")
    print(f"\n  TOTAL: {total}")

    if total < 50:
        print("\n‚ö†Ô∏è  WARNING: Low sample count!")
        response = input("Continue anyway? (y/n): ")
        if response.lower() != 'y':
            db.close()
            return

    print("\nüì• Loading samples...")
    samples = db.get_samples()

    num_classes = len(stats)
    cnn = GestureCNN(num_classes=num_classes, input_shape=(21, 3))

    print("üîÑ Preparing data...")
    X, y = cnn.prepare_data(samples)

    print("üèóÔ∏è  Building model...")
    cnn.build_model()
    print(cnn.model.summary())

    print("\n" + "="*60)
    print("TRAINING PARAMETERS")
    print("="*60)

    epochs = int(input("Epochs (default 50): ") or "50")
    batch_size = int(input("Batch size (default 32): ") or "32")
    validation_split = float(input("Validation split (default 0.2): ") or "0.2")

    print(f"\n  Epochs: {epochs}")
    print(f"  Batch size: {batch_size}")
    print(f"  Validation split: {validation_split}")
    print("="*60)

    print("\nüéì Training...\n")
    history = cnn.train(X, y, epochs=epochs, batch_size=batch_size, validation_split=validation_split)

    final_accuracy = history.history['val_accuracy'][-1]
    final_loss = history.history['val_loss'][-1]

    print("\nüìä Results:")
    print(f"  Validation Accuracy: {final_accuracy*100:.2f}%")
    print(f"  Validation Loss: {final_loss:.4f}")

    model_path = "gesture_cnn_model.h5"
    print(f"\nüíæ Saving to '{model_path}'...")
    cnn.save_model(model_path)

    db.save_training_session(model_path, final_accuracy, final_loss, epochs, total)
    db.close()

    print("\n‚úÖ Training complete!\n")

# =========================================================
# REAL-TIME CONTROL MODULE
# =========================================================
def run_control():
    """Real-time gesture control interface"""
    print("\n" + "="*60)
    print("üöÅ GESTURE CONTROL MODE")
    print("="*60)

    model_path = "gesture_cnn_model.h5"

    if not os.path.exists(model_path):
        print("\n‚ùå Model not found!")
        print("Train the model first (option 2).")
        return

    try:
        recognizer = HybridGestureRecognizer(use_cnn=True, model_path=model_path)
        print("‚úÖ CNN model loaded")
    except Exception as e:
        print(f"\n‚ùå Error loading model: {e}")
        return

    db = GestureDatabase()
    gesture_names = db.get_all_gesture_names()

    print("\nüìã Available Gestures:")
    for name in gesture_names:
        print(f"  ‚Ä¢ {name.upper()}")

    print("\n‚öôÔ∏è  Controls:")
    print("  r: Reset buffer")
    print("  q: Quit")
    print("="*60 + "\n")

    cap = cv2.VideoCapture(0)
    current_command = "none"

    frame_count = 0
    start_time = time.time()
    fps = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        h, w, _ = frame.shape

        roi_x1, roi_y1 = 120, 60
        roi_x2, roi_y2 = 520, 420
        roi = frame[roi_y1:roi_y2, roi_x1:roi_x2]

        landmarks = recognizer.detect_hand(roi)

        raw_gesture = "none"
        confidence = 0.0

        if landmarks:
            raw_gesture, confidence = recognizer.recognize_gesture(landmarks)

            # Draw landmarks
            for landmark in landmarks:
                x = int(landmark.x * (roi_x2 - roi_x1)) + roi_x1
                y = int(landmark.y * (roi_y2 - roi_y1)) + roi_y1
                cv2.circle(frame, (x, y), 5, (0, 255, 255), -1)

            # Draw connections
            connections = [
                (0, 1), (1, 2), (2, 3), (3, 4),
                (0, 5), (5, 6), (6, 7), (7, 8),
                (0, 9), (9, 10), (10, 11), (11, 12),
                (0, 13), (13, 14), (14, 15), (15, 16),
                (0, 17), (17, 18), (18, 19), (19, 20),
                (5, 9), (9, 13), (13, 17)
            ]

            for start, end in connections:
                x1 = int(landmarks[start].x * (roi_x2 - roi_x1)) + roi_x1
                y1 = int(landmarks[start].y * (roi_y2 - roi_y1)) + roi_y1
                x2 = int(landmarks[end].x * (roi_x2 - roi_x1)) + roi_x1
                y2 = int(landmarks[end].y * (roi_y2 - roi_y1)) + roi_y1
                cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        stable = recognizer.stable_gesture(raw_gesture)

        if stable != "none" and stable != current_command:
            if recognizer.can_send():
                current_command = stable
                recognizer.mark_sent()

                if current_command in COMMAND_MAP:
                    COMMAND_MAP[current_command]()
                    print(f"‚ö° {current_command.upper()} (confidence: {confidence*100:.1f}%)")

        # FPS calculation
        frame_count += 1
        if frame_count % 30 == 0:
            end_time = time.time()
            fps = 30 / (end_time - start_time)
            start_time = time.time()

        # UI
        roi_color = (0, 255, 0) if recognizer.can_send() else (0, 165, 255)
        cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2), roi_color, 3)

        cv2.rectangle(frame, (0, 0), (w, 150), (0, 0, 0), -1)

        hand_status = "HAND DETECTED" if landmarks else "NO HAND"
        hand_color = (0, 255, 0) if landmarks else (100, 100, 100)
        cv2.putText(frame, hand_status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, hand_color, 2)

        if raw_gesture != "none":
            raw_text = f"RAW: {raw_gesture.upper()} ({confidence*100:.1f}%)"
            cv2.putText(frame, raw_text, (20, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        cmd_color = (0, 255, 0) if current_command != "none" else (100, 100, 100)
        cv2.putText(frame, f"CMD: {current_command.upper()}", (20, 120),
                   cv2.FONT_HERSHEY_SIMPLEX, 1.2, cmd_color, 3)

        cv2.putText(frame, f"FPS: {fps:.1f}", (w - 150, 40),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)

        status = "READY" if recognizer.can_send() else "COOLDOWN"
        status_color = (0, 255, 0) if recognizer.can_send() else (0, 165, 255)
        cv2.putText(frame, status, (w - 150, 75),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, status_color, 2)

        # Stability bar
        if raw_gesture != "none":
            stable_count = sum(1 for g in recognizer.buffer if g == raw_gesture)

            bar_x, bar_y = 20, h - 40
            bar_w, bar_h = 300, 20

            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (50, 50, 50), -1)

            fill_w = int((stable_count / recognizer.confirmation_threshold) * bar_w)
            bar_color = (0, 255, 0) if stable_count >= recognizer.confirmation_threshold else (0, 165, 255)
            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + fill_w, bar_y + bar_h), bar_color, -1)

            cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + bar_h), (255, 255, 255), 2)

            cv2.putText(frame, f"Stability: {stable_count}/{recognizer.confirmation_threshold}",
                       (bar_x, bar_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        cv2.imshow("Gesture Control", frame)

        key = cv2.waitKey(1) & 0xFF

        if key == ord('q'):
            break
        elif key == ord('r'):
            recognizer.buffer.clear()
            current_command = "none"
            print("üîÑ Buffer reset")

    cap.release()
    cv2.destroyAllWindows()
    db.close()

    print("\n‚úÖ Control mode closed\n")

# =========================================================
# MAIN MENU
# =========================================================
def main():
    print("\n" + "="*60)
    print("üöÅ CNN GESTURE RECOGNITION SYSTEM üöÅ")
    print("="*60)
    print("\nAll-in-one system for gesture-based drone control")
    print("Combines MediaPipe + CNN + SQLite Database")
    print("="*60)

    while True:
        print("\n" + "="*60)
        print("MAIN MENU")
        print("="*60)
        print("1. Collect Training Data")
        print("2. Train CNN Model")
        print("3. Run Gesture Control")
        print("4. View Database Statistics")
        print("5. Clear Database")
        print("6. Exit")
        print("="*60)

        choice = input("\nSelect option (1-6): ").strip()

        if choice == '1':
            collect_data()

        elif choice == '2':
            train_model()

        elif choice == '3':
            run_control()

        elif choice == '4':
            db = GestureDatabase()
            stats = db.get_sample_count()

            if stats:
                total = sum(stats.values())
                print("\n" + "="*40)
                print("üìä DATABASE STATISTICS")
                print("="*40)
                for gesture, count in stats.items():
                    print(f"  {gesture.upper():15s}: {count:4d} samples")
                print("="*40)
                print(f"  TOTAL:          {total:4d} samples")
                print("="*40)
            else:
                print("\nüìä No data in database")

            db.close()

        elif choice == '5':
            print("\n‚ö†Ô∏è  WARNING: This will delete all training data!")
            confirm = input("Type 'DELETE' to confirm: ").strip()

            if confirm == 'DELETE':
                db = GestureDatabase()
                db.clear_samples()
                db.close()
                print("üóëÔ∏è  All samples deleted")
            else:
                print("‚ùå Cancelled")

        elif choice == '6':
            print("\nüëã Goodbye!\n")
            break

        else:
            print("\n‚ùå Invalid option. Please select 1-6.")

if __name__ == "__main__":
    # Check for MediaPipe model
    if not os.path.exists("hand_landmarker.task"):
        print("\n" + "="*60)
        print("‚ö†Ô∏è  WARNING: hand_landmarker.task not found!")
        print("="*60)
        print("\nPlease download the MediaPipe hand landmark model:")
        print("https://developers.google.com/mediapipe/solutions/vision/hand_landmarker")
        print("\nPlace 'hand_landmarker.task' in the same folder as this script.")
        print("="*60 + "\n")
        input("Press Enter after downloading the file...")

    main()


üöÅ CNN GESTURE RECOGNITION SYSTEM üöÅ

All-in-one system for gesture-based drone control
Combines MediaPipe + CNN + SQLite Database

MAIN MENU
1. Collect Training Data
2. Train CNN Model
3. Run Gesture Control
4. View Database Statistics
5. Clear Database
6. Exit

üìä DATABASE STATISTICS
  HOVER          :  500 samples
  LEFT           :  852 samples
  RIGHT          :  800 samples
  STOP           :  500 samples
  SUMMERSAULT    :  600 samples
  UP             :  700 samples
  TOTAL:          3952 samples

MAIN MENU
1. Collect Training Data
2. Train CNN Model
3. Run Gesture Control
4. View Database Statistics
5. Clear Database
6. Exit

üëã Goodbye!

