In [None]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import os
import time
import random
from datetime import datetime


def augment_landmarks(landmarks, augmentation_factor=2):
    """
    –ê—É–≥–º–µ–Ω—Ç–∞—Ü–∏—è landmarks: —Å–ª—É—á–∞–π–Ω–æ–µ —Ä–∞—Å—Ç—è–∂–µ–Ω–∏–µ –∏ –Ω–µ–±–æ–ª—å—à–æ–µ –≤—Ä–∞—â–µ–Ω–∏–µ
    """
    augmented_samples = []

    for _ in range(augmentation_factor):
        augmented_landmarks = []

        for lm in landmarks:
            # –°–æ–∑–¥–∞–µ–º –∫–æ–ø–∏—é landmark
            new_lm = type("", (), {})()  # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç–æ–π –æ–±—ä–µ–∫—Ç
            new_lm.x = lm.x
            new_lm.y = lm.y
            new_lm.z = lm.z

            # –°–ª—É—á–∞–π–Ω–æ–µ —Ä–∞—Å—Ç—è–∂–µ–Ω–∏–µ –ø–æ –æ—Å—è–º (–Ω–µ —Å–ª–∏—à–∫–æ–º —Å–∏–ª—å–Ω–æ–µ)
            stretch_x = random.uniform(0.95, 1.05)
            stretch_y = random.uniform(0.95, 1.05)
            stretch_z = random.uniform(0.98, 1.02)  # –ü–æ Z –º–µ–Ω—å—à–µ –∏–∑–º–µ–Ω–µ–Ω–∏–π

            # –¶–µ–Ω—Ç—Ä –ª–∞–¥–æ–Ω–∏ –¥–ª—è –≤—Ä–∞—â–µ–Ω–∏—è
            center_x = np.mean([lm.x for lm in landmarks])
            center_y = np.mean([lm.y for lm in landmarks])

            # –ù–µ–±–æ–ª—å—à–æ–µ –≤—Ä–∞—â–µ–Ω–∏–µ (–º–∞–∫—Å–∏–º—É–º 15 –≥—Ä–∞–¥—É—Å–æ–≤)
            angle = random.uniform(-15, 15) * np.pi / 180  # –í —Ä–∞–¥–∏–∞–Ω–∞—Ö

            # –ü—Ä–∏–º–µ–Ω—è–µ–º —Ä–∞—Å—Ç—è–∂–µ–Ω–∏–µ
            stretched_x = (new_lm.x - center_x) * stretch_x + center_x
            stretched_y = (new_lm.y - center_y) * stretch_y + center_y
            stretched_z = new_lm.z * stretch_z

            # –ü—Ä–∏–º–µ–Ω—è–µ–º –≤—Ä–∞—â–µ–Ω–∏–µ —Ç–æ–ª—å–∫–æ –≤–æ–∫—Ä—É–≥ –æ—Å–∏ Z (–Ω–µ –º–µ–Ω—è–µ–º –æ—Ä–∏–µ–Ω—Ç–∞—Ü–∏—é –∂–µ—Å—Ç–∞)
            rotated_x = (
                center_x
                + (stretched_x - center_x) * np.cos(angle)
                - (stretched_y - center_y) * np.sin(angle)
            )
            rotated_y = (
                center_y
                + (stretched_x - center_x) * np.sin(angle)
                + (stretched_y - center_y) * np.cos(angle)
            )

            # –ù–µ–±–æ–ª—å—à–æ–µ —Å–ª—É—á–∞–π–Ω–æ–µ —Å–º–µ—â–µ–Ω–∏–µ
            offset_x = random.uniform(-0.01, 0.01)
            offset_y = random.uniform(-0.01, 0.01)
            offset_z = random.uniform(-0.005, 0.005)

            # –§–∏–Ω–∞–ª—å–Ω—ã–µ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã
            new_lm.x = rotated_x + offset_x
            new_lm.y = rotated_y + offset_y
            new_lm.z = stretched_z + offset_z

            augmented_landmarks.append(new_lm)

        augmented_samples.append(augmented_landmarks)

    return augmented_samples


def extract_features_variations(landmarks):
    """
    –ù–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è
    """

    try:
        # –ù–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è –æ—Ç–Ω–æ—Å–∏—Ç–µ–ª—å–Ω–æ —Ü–µ–Ω—Ç—Ä–∞ –ª–∞–¥–æ–Ω–∏
        center_x = np.mean([lm.x for lm in landmarks])
        center_y = np.mean([lm.y for lm in landmarks])
        center_z = np.mean([lm.z for lm in landmarks])

        distances = [
            np.linalg.norm([lm.x - center_x, lm.y - center_y, lm.z - center_z])
            for lm in landmarks
        ]
        scale = np.std(distances) if np.std(distances) > 1e-6 else 1e-6

        coords = []
        for lm in landmarks:
            coords.extend(
                [
                    (lm.x - center_x) / scale,
                    (lm.y - center_y) / scale,
                    (lm.z - center_z) / scale,
                ]
            )

        return coords

    except Exception as e:
        print(f"‚ö†Ô∏è  Error in feature extraction: {e}")
        return None


def save_dataset(rows, output_csv, gesture_label):
    """
    –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –¥–∞—Ç–∞—Å–µ—Ç–∞ —Å –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–º–∏
    """
    columns = [f"{axis}{i}" for i in range(21) for axis in ("x", "y", "z")] + [
        "gesture",
        "is_right_hand",
    ]
    df = pd.DataFrame(rows, columns=columns)

    # –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–µ
    metadata = {
        "collection_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "gesture_label": gesture_label,
        "total_samples": len(df),
        "right_hand_samples": len(df[df["is_right_hand"] == 1]),
        "left_hand_samples": len(df[df["is_right_hand"] == 0]),
    }

    # –ò—Å–ø—Ä–∞–≤–ª–µ–Ω–∏–µ: –ø—Ä–æ–≤–µ—Ä—è–µ–º, –µ—Å—Ç—å –ª–∏ –ø—É—Ç—å –¥–ª—è —Å–æ–∑–¥–∞–Ω–∏—è –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏–∏
    output_dir = os.path.dirname(output_csv)
    if output_dir:  # –ï—Å–ª–∏ –ø—É—Ç—å –Ω–µ –ø—É—Å—Ç–æ–π
        os.makedirs(output_dir, exist_ok=True)

    # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–π —Ñ–∞–π–ª –∏ –¥–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–µ
    if os.path.exists(output_csv):
        df_existing = pd.read_csv(output_csv)
        df = pd.concat([df_existing, df], ignore_index=True)
        print(f"üìÅ Added to existing dataset. Total samples: {len(df)}")
    else:
        print(f"üìÑ Created new dataset with {len(df)} samples")

    df.to_csv(output_csv, index=False)

    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–µ—Ç–∞–¥–∞–Ω–Ω—ã–µ –≤ –æ—Ç–¥–µ–ª—å–Ω—ã–π —Ñ–∞–π–ª
    meta_csv = output_csv.replace(".csv", "_metadata.txt")
    with open(meta_csv, "a") as f:
        f.write(f"\n=== Session {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===\n")
        for key, value in metadata.items():
            f.write(f"{key}: {value}\n")


def capture_hands_with_augmentation(
    output_csv,
    gesture_label,
    max_samples=100,
    augmentation_factor=2,  # –°–∫–æ–ª—å–∫–æ –∞—É–≥–º–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö samples –¥–æ–±–∞–≤–ª—è—Ç—å –∫ –∫–∞–∂–¥–æ–º—É —Ä–µ–∞–ª—å–Ω–æ–º—É
    capture_interval=0.3,
):
    """
    –£–ª—É—á—à–µ–Ω–Ω—ã–π —Å–±–æ—Ä –¥–∞–Ω–Ω—ã—Ö —Å –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–µ–π –∂–µ—Å—Ç–æ–≤
    """
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils

    hands = mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.5,
    )

    rows = []
    count = 0
    last_capture_time = 0
    augmentation_stats = {"original": 0, "augmented": 0}

    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("!!! Unable to access camera")
        return

    print(f"üé• Capturing: {gesture_label} with augmentation")
    print(f"üí° Each sample will generate {augmentation_factor} augmented versions")
    print("üí° Move your hand around for better data variety!")

    while cap.isOpened() and count < max_samples:
        success, frame = cap.read()
        if not success:
            break

        frame_flipped = cv2.flip(frame, 1)
        image_rgb = cv2.cvtColor(frame_flipped, cv2.COLOR_BGR2RGB)
        result = hands.process(image_rgb)

        current_time = time.time()

        if (
            result.multi_hand_landmarks
            and result.multi_handedness
            and current_time - last_capture_time >= capture_interval
        ):

            hand_landmarks = result.multi_hand_landmarks[0]
            label = result.multi_handedness[0].classification[0].label
            is_right = 1 if label == "Right" else 0

            # –û–†–ò–ì–ò–ù–ê–õ–¨–ù–´–ô sample
            original_coords = extract_features_variations(hand_landmarks.landmark)

            if original_coords is not None:
                # –î–æ–±–∞–≤–ª—è–µ–º –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω—ã–π sample
                rows.append(original_coords + [gesture_label, is_right])
                augmentation_stats["original"] += 1
                count += 1

                print(f"‚úÖ Sample {count}/{max_samples} | Hand: {label}")

                # –ê–£–ì–ú–ï–ù–¢–ò–†–û–í–ê–ù–ù–´–ï samples
                augmented_landmarks_list = augment_landmarks(
                    hand_landmarks.landmark, augmentation_factor
                )

                for i, aug_landmarks in enumerate(augmented_landmarks_list):
                    aug_coords = extract_features_variations(aug_landmarks)
                    if aug_coords is not None:
                        rows.append(aug_coords + [gesture_label, is_right])
                        augmentation_stats["augmented"] += 1
                        print(f"   ‚Ü≥ Augmented {i+1}/{augmentation_factor}")

                last_capture_time = current_time

            # –†–∏—Å—É–µ–º landmarks
            mp_drawing.draw_landmarks(
                frame_flipped, hand_landmarks, mp_hands.HAND_CONNECTIONS
            )

        # –û—Ç–æ–±—Ä–∞–∂–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏
        info_text = [
            f"Gesture: {gesture_label}",
            f"Samples: {count}/{max_samples}",
            f"Augmented: +{augmentation_stats['augmented']}",
            "Move hand for variety!",
            "Press Q to quit",
        ]

        for i, text in enumerate(info_text):
            cv2.putText(
                frame_flipped,
                text,
                (10, 30 + i * 25),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 255, 0),
                2,
            )

        cv2.imshow("Hand Capture - WITH AUGMENTATION", frame_flipped)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
    hands.close()

    if rows:
        save_dataset(rows, output_csv, gesture_label)
        print(f"üéâ Collection complete!")
        print(
            f"üìä Stats: {augmentation_stats['original']} original + {augmentation_stats['augmented']} augmented = {len(rows)} total samples"
        )

        # –ê–Ω–∞–ª–∏–∑ —Å–æ–±—Ä–∞–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
        analyze_collected_data(rows, gesture_label)
    else:
        print("‚ùå No samples captured.")


def visualize_augmentation_example(landmarks, augmented_landmarks):
    """
    –í–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏—è –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–∏ (–¥–ª—è –æ—Ç–ª–∞–¥–∫–∏)
    """
    # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç–æ–µ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
    img = np.zeros((400, 600, 3), dtype=np.uint8)

    # –§—É–Ω–∫—Ü–∏—è –¥–ª—è –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç –≤ –ø–∏–∫—Å–µ–ª–∏
    def to_pixels(x, y):
        return int((x + 0.5) * 400), int((y + 0.5) * 400)

    # –†–∏—Å—É–µ–º –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω—ã–µ landmarks (–∑–µ–ª–µ–Ω—ã–µ)
    for i, lm in enumerate(landmarks):
        px, py = to_pixels(lm.x, lm.y)
        cv2.circle(img, (px, py), 3, (0, 255, 0), -1)
        if i < len(landmarks) - 1:
            next_px, next_py = to_pixels(landmarks[i + 1].x, landmarks[i + 1].y)
            cv2.line(img, (px, py), (next_px, next_py), (0, 255, 0), 1)

    # –†–∏—Å—É–µ–º –∞—É–≥–º–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ landmarks (–∫—Ä–∞—Å–Ω—ã–µ)
    for i, lm in enumerate(augmented_landmarks[0]):  # –ü–µ—Ä–≤—ã–π –∞—É–≥–º–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–π sample
        px, py = to_pixels(lm.x, lm.y)
        cv2.circle(img, (px, py), 3, (0, 0, 255), -1)
        if i < len(augmented_landmarks[0]) - 1:
            next_px, next_py = to_pixels(
                augmented_landmarks[0][i + 1].x, augmented_landmarks[0][i + 1].y
            )
            cv2.line(img, (px, py), (next_px, next_py), (0, 0, 255), 1)

    cv2.putText(
        img,
        "Green: Original, Red: Augmented",
        (10, 380),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.5,
        (255, 255, 255),
        1,
    )
    cv2.imshow("Augmentation Example", img)
    cv2.waitKey(1000)  # –ü–æ–∫–∞–∑—ã–≤–∞–µ–º 1 —Å–µ–∫—É–Ω–¥—É
    cv2.destroyWindow("Augmentation Example")


def analyze_collected_data(rows, gesture_label):
    """–ë—ã—Å—Ç—Ä—ã–π –∞–Ω–∞–ª–∏–∑ —Å–æ–±—Ä–∞–Ω–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö"""
    print(f"\nüìä Analysis for {gesture_label}:")
    print(f"Total samples: {len(rows)}")

    # –ê–Ω–∞–ª–∏–∑ —Ä–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è —Ä—É–∫
    right_hand_count = sum(1 for row in rows if row[-1] == 1)
    left_hand_count = len(rows) - right_hand_count
    print(f"Right hand: {right_hand_count}")
    print(f"Left hand: {left_hand_count}")

    # –ê–Ω–∞–ª–∏–∑ –¥–∏–∞–ø–∞–∑–æ–Ω–∞ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç
    all_coords = [
        coord for row in rows for coord in row[:-2]
    ]  # –í—Å–µ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã –∫—Ä–æ–º–µ –º–µ—Ç–æ–∫
    print(f"Coordinate range: [{min(all_coords):.3f}, {max(all_coords):.3f}]")

    if right_hand_count == 0 or left_hand_count == 0:
        print("‚ö†Ô∏è  Warning: Only one hand type detected. Try using both hands!")


# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == "__main__":
    print("üöÄ Starting hand gesture data collection WITH AUGMENTATION...")

    # –ó–∞—Ö–≤–∞—Ç –¥–∞–Ω–Ω—ã—Ö —Å –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–µ–π
    capture_hands_with_augmentation(
        output_csv="don.csv",
        gesture_label="don",
        max_samples=300,  # –ú–µ–Ω—å—à–µ –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω—ã—Ö samples, –Ω–æ –±–æ–ª—å—à–µ –∑–∞ —Å—á–µ—Ç –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–∏
        augmentation_factor=2,  # 1 –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω—ã–π + 2 –∞—É–≥–º–µ–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö = 3 samples –∑–∞ —Ä–∞–∑
        capture_interval=0.1,
    )