In [1]:
import cv2
import mediapipe as mp
import os
import tkinter as tk
from tkinter import simpledialog

# Initialize MediaPipe Hand module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

# Initialize the main window using Tkinter
root = tk.Tk()
root.withdraw()  # Hide the root window

def get_hand_type(hand_landmarks, w):
    """Determine if the hand is left or right based on landmark positions."""
    wrist_x = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x * w
    hand_type = "Right" if wrist_x > w / 2 else "Left"
    return hand_type

def capture_images(sign, num_images=2800, delay=2):
    # Create a directory for the sign if it doesn't exist
    if not os.path.exists(sign):
        os.makedirs(sign)

    cap = cv2.VideoCapture(0)
    count = 0
    capture_started = False

    while count < num_images:
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                hand_type = get_hand_type(hand_landmarks, frame.shape[1])  # Determine hand type
                frame_to_save = frame.copy()
                h, w, c = frame.shape
                x_min, y_min = w, h
                x_max, y_max = 0, 0

                for landmark in hand_landmarks.landmark:
                    x, y = int(landmark.x * w), int(landmark.y * h)
                    x_min, y_min = min(x_min, x), min(y_min, y)
                    x_max, y_max = max(x_max, x), max(y_max, y)

                padding = 10
                x_min = max(x_min - padding, 0)
                y_min = max(y_min - padding, 0)
                x_max = min(x_max + padding, w)
                y_max = min(y_max + padding, h)

                # Draw a green rectangle around the detected hand
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Display the rectangle and prompt the user
                if not capture_started:
                    cv2.putText(frame, "Place your hand within the green rectangle", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                    cv2.putText(frame, "Capturing will start in 3 seconds...", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                    cv2.imshow('Capturing Sign', frame)
                    cv2.waitKey(3000)  # Wait for 3 seconds
                    capture_started = True

                # Capture the image after showing the rectangle
                hand_roi = frame_to_save[y_min:y_max, x_min:x_max]

                if hand_roi.size > 0:
                    hand_roi_resized = cv2.resize(hand_roi, (128, 128))  # Resize to 128x128
                    hand_roi_gray = cv2.cvtColor(hand_roi_resized, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

                    img_name = f"{sign}/{count + 1}.jpg"
                    cv2.imwrite(img_name, hand_roi_gray)
                    count += 1

                    cv2.putText(frame, f"Capturing {sign} - {count}/{num_images}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)

                if count >= num_images:
                    break

        cv2.imshow('Capturing Sign', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

def capture_for_all_signs(signs, num_images=2800, delay=2):
    for sign in signs:
        capture_images(sign, num_images, delay)

# Main loop to capture images based on the sign entered by the user
while True:
    sign = simpledialog.askstring("Input", "Enter the sign (A, B, etc.) or 'q' to quit:")

    if sign == 'q' or sign is None:
        break

    capture_images(sign)

print("Dataset generation completed.")


2025-02-07 13:12:40.901499: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-07 13:12:40.905877: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-07 13:12:40.918218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738915960.938919   22622 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738915960.944960   22622 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-07 13:12:40.966551: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Dataset generation completed.


In [1]:
import cv2
import mediapipe as mp
import os
import tkinter as tk
from tkinter import simpledialog
import time

# Initialize MediaPipe Hand module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

# Initialize the main window using Tkinter
root = tk.Tk()
root.withdraw()  # Hide the root window

# Function to capture images for a given sign
def capture_images(sign):
    # Create a directory for the sign if it doesn't exist
    if not os.path.exists(sign):
        os.makedirs(sign)

    # Start video capture
    cap = cv2.VideoCapture(0)

    # Set up variables
    count = 0
    capture_started = False

    while count < 400:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the image to RGB (MediaPipe works with RGB images)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame and detect hands
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Create a copy of the frame for saving purposes
                frame_to_save = frame.copy()

                # Get the bounding box around the hand
                h, w, c = frame.shape
                x_min, y_min = w, h
                x_max, y_max = 0, 0

                for landmark in hand_landmarks.landmark:
                    x, y = int(landmark.x * w), int(landmark.y * h)
                    x_min, y_min = min(x_min, x), min(y_min, y)
                    x_max, y_max = max(x_max, x), max(y_max, y)

                # Expand the bounding box slightly for better ROI
                padding = 10
                x_min = max(x_min - padding, 0)
                y_min = max(y_min - padding, 0)
                x_max = min(x_max + padding, w)
                y_max = min(y_max + padding, h)

                # Draw hand landmarks on the frame for real-time display
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Extract the hand region as a separate image from the copy
                hand_roi = frame_to_save[y_min:y_max, x_min:x_max]

                if hand_roi.size > 0:
                    if not capture_started:
                        # Wait for 2 seconds before starting to capture
                        time.sleep(2)
                        capture_started = True

                    # Resize the hand ROI to 128x128
                    hand_roi_resized = cv2.resize(hand_roi, (256, 256))

                    img_name = f"{sign}/{count + 1}.jpg"
                    cv2.imwrite(img_name, hand_roi_resized)
                    count += 1

                    # Display the frame count on the frame
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    cv2.putText(frame, f"Capturing {sign} - {count}/400", (10, 30), font, 1, (255, 0, 0), 2, cv2.LINE_AA)

                # Break the loop after capturing 300 images
                if count >= 400:
                    break

        # Display the frame with the landmarks
        cv2.imshow('Capturing Sign', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Main loop to capture images based on the sign entered by the user
while True:
    # Ask the user for the sign (e.g., A, B, etc.)
    sign = simpledialog.askstring("Input", "Enter the sign (A, B, etc.) or 'q' to quit:")

    if sign == 'q' or sign is None:
        break

    capture_images(sign)

print("Dataset generation completed.")


2025-02-07 12:58:38.147884: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-07 12:58:38.152258: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-07 12:58:38.165322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738915118.186969   18861 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738915118.193027   18861 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-07 12:58:38.215986: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Dataset generation completed.
