In [1]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

def preprocessImage(img, imageSize):
    # Step 1: Median filter for Salt and Pepper noise
    salt_pepper_filtered = cv2.medianBlur(img, 3)  # Kernel size 3 (you can adjust based on noise level)
    
    # Step 2: Gaussian Blurring for Gaussian noise
    gaussian_filtered = cv2.GaussianBlur(salt_pepper_filtered, (5, 5), 0)  # Kernel size 5x5, standard deviation 0
    
    # Step 3: Median filter for Uniform noise
    uniform_filtered = cv2.medianBlur(gaussian_filtered, 3)  # Kernel size 3 (you can adjust based on noise level)
    
    # Step 4: Resize the image to be used in the AI Algorithm
    uniform_filtered = cv2.resize(uniform_filtered, imageSize)
    
    # Step: 5 Normalize image to place all values of RGB between 0 and 1
    normalized_filtered = uniform_filtered / 255.0 
    
    return normalized_filtered

In [2]:
# Set up hand detection using Mediapipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

margin = 100  # Set the margin size

# Define the input size expected by your model (adjust based on your model's input requirements)
input_size = (128, 128)  # Change this if your model expects a different size

# Define class labels
class_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
                                'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                                'Y', 'Z', 'del', 'nothing', 'space']

I0000 00:00:1727233042.878442       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro


In [3]:
def extractHand(frame):
    
    # Convert the frame to RGB for Mediapipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame to detect hands
    results = hands.process(rgb_frame)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the bounding box of the detected hand
            h, w, c = frame.shape
            x_min = w
            y_min = h
            x_max = 0
            y_max = 0
            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * w), int(landmark.y * h)
                x_min = min(x_min, x)
                y_min = min(y_min, y)
                x_max = max(x_max, x)
                y_max = max(y_max, y)

            # Calculate width and height
            width = x_max - x_min
            height = y_max - y_min

            # Determine the size for the square bounding box
            square_size = max(width, height) + 2 * margin  # Add margin to the square size

            # Center the square bounding box
            center_x = x_min + width // 2
            center_y = y_min + height // 2

            # Calculate new bounding box coordinates with margin
            x_min_square = max(0, center_x - square_size // 2)
            y_min_square = max(0, center_y - square_size // 2)
            x_max_square = min(w, center_x + square_size // 2)
            y_max_square = min(h, center_y + square_size // 2)

            # Draw the bounding box around the detected hand
            cv2.rectangle(frame, (x_min_square, y_min_square), (x_max_square, y_max_square), (0, 255, 0), 2)

            # Crop the image to just the hand with margin
            hand_img = frame[y_min_square:y_max_square, x_min_square:x_max_square]

            return hand_img, x_min_square, y_min_square, x_max_square, y_max_square
                
    return None, None, None, None, None


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [5]:
# Load your custom CNN model
model = load_model('best_model_CNN.keras')


# Set up webcam capture
cap = cv2.VideoCapture(0)  # 0 is the default webcam index

# Check if the webcam opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    # Flip the frame horizontally for a mirror view
    frame = cv2.flip(frame, 1)

    hand_img, x_min_square, y_min_square, x_max_square, y_max_square = extractHand(frame)
    
    if hand_img is not None:
        hand_img = cv2.flip(hand_img, 1)
        
        hand_img = preprocessImage(hand_img, input_size)
        
        # hand_img = cv2.cvtColor(hand_img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        # hand_img = hand_img.astype('float32') / 255.0  # Normalize pixel values to [0, 1]
        hand_img = np.expand_dims(hand_img, axis=0)  # Add batch dimension

        # Make predictions
        preds = model.predict(hand_img)
        predicted_class = np.argmax(preds, axis=1)[0]  # Get the predicted class index

        
        
        label = class_labels[predicted_class] if predicted_class < len(class_labels) else "Unknown"

        # Display the prediction on the frame
        cv2.putText(frame, f"Prediction: {label}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame with the prediction and hand landmarks
    cv2.imshow('Webcam Feed', frame)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10m

KeyboardInterrupt: 

In [9]:
import cv2
import mediapipe as mp
import numpy as np
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)

# Create a directory to save frames
letter = input("What letter do you want to add training data for: ").lower()
name = input("What is your Name: ").lower()

output_dir = f"dataset/custom/{letter}"
os.makedirs(output_dir, exist_ok=True)

def extract_hand(image):
    # Convert the image to uint8 if it's not already
    if image.dtype != np.uint8:
        image = cv2.convertScaleAbs(image)
    
    # Convert the image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Process the image to find hands
    results = hands.process(image_rgb)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the bounding box around the hand
            h, w, _ = image.shape
            x_min = w
            y_min = h
            x_max = 0
            y_max = 0
            
            # Extract coordinates of the landmarks to define bounding box
            for landmark in hand_landmarks.landmark:
                x = int(landmark.x * w)
                y = int(landmark.y * h)
                x_min = min(x_min, x)
                y_min = min(y_min, y)
                x_max = max(x_max, x)
                y_max = max(y_max, y)
            
            # Add margin of 100 pixels
            margin = 100
            x_min = max(0, x_min - margin)
            y_min = max(0, y_min - margin)
            x_max = min(w, x_max + margin)
            y_max = min(h, y_max + margin)

            # Crop the hand region from the original image
            hand_region = image[y_min:y_max, x_min:x_max]
            
            # Check if the crop is valid (non-zero dimensions)
            if hand_region.size == 0:
                print("Hand region could not be cropped properly.")
                return None
            
            # Display the original image with bounding box
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.imshow("Original Image with Bounding Box", image)
            
            return hand_region  # Return only the cropped hand region
    else:
        print("No hand detected.")
        return None

def record_frames():
    cap = cv2.VideoCapture(0)  # Capture video from webcam

    frame_count = 0
    recording = True

    while recording:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame.")
            break

        hand_region = extract_hand(frame)  # Process the frame for hand detection

        # Save the cropped hand region if detected
        if hand_region is not None:
            frame_filename = os.path.join(output_dir, f"hand_{frame_count:04d}_{name}.jpg")
            cv2.imwrite(frame_filename, hand_region)
            print(f"Saved: {frame_filename}")
            frame_count += 1
        
        # Display the frame
        cv2.imshow("Webcam Feed", frame)

        # Press 'q' to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            recording = False

    cap.release()
    cv2.destroyAllWindows()

# Start the recording when this function is called
record_frames()


I0000 00:00:1727233660.720558       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro


No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand de

KeyboardInterrupt: 