## Data Collection

With Landmarks - Pipeline_2

In [None]:
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import os
import mediapipe as mp

# ----------------------------
# Initialize MediaPipe Selfie Segmentation
# ----------------------------
mp_selfie_segmentation = mp.solutions.selfie_segmentation
segmentation = mp_selfie_segmentation.SelfieSegmentation(model_selection=1)

# ----------------------------
# Initialize Webcam and Hand Detector
# ----------------------------
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)

# Constants and folders for saving images
imgSize = 500
baseFolder = "C:/Users/User/OneDrive/Documents/SignLanguageApp/SLangDataset/JustZ"
letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "HI", "Space"]  # List of letters A-Y 
maxImages = 600         # Total images to capture per class
paddingFactor = 0.45     # Padding percentage

mp_hands = mp.solutions.hands

# ----------------------------
# Utility function: Process and resize image for saving
# ----------------------------
def process_and_resize(imgCrop, aspectRatio, imgSize):
    channels = 1 if len(imgCrop.shape) == 2 else imgCrop.shape[2]
    imgWhite = np.ones((imgSize, imgSize, channels), np.uint8) * 0
    try:
        if aspectRatio > 1:
            # Height > width:
            k = imgSize / imgCrop.shape[0]
            wCal = math.ceil(k * imgCrop.shape[1])
            imgResize = cv2.resize(imgCrop, (wCal, imgSize))
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wCal + wGap] = imgResize
        else:
            # Width >= height:
            k = imgSize / imgCrop.shape[1]
            hCal = math.ceil(k * imgCrop.shape[0])
            imgResize = cv2.resize(imgCrop, (imgSize, hCal))
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hCal + hGap, :] = imgResize
    except Exception as e:
        print(f"Error during image processing: {e}")
        return None
    return imgWhite

# ----------------------------
# ----------------------------
def detect_skin(frame):
    ycrcb = cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb)
    lower_skin = np.array([0, 133, 77], dtype=np.uint8)
    upper_skin = np.array([255, 173, 127], dtype=np.uint8)
    mask = cv2.inRange(ycrcb, lower_skin, upper_skin)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.GaussianBlur(mask, (5, 5), 0)
    
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_mask = np.zeros_like(mask)
    cv2.drawContours(contour_mask, contours, -1, 255, thickness=cv2.FILLED)
    mask = cv2.bitwise_and(mask, contour_mask)
    
    return mask

# ----------------------------
# Main Loop: Process each class (letter)
# ----------------------------
for className in letters:
    print(f"Starting collection for: {className}")
    folder = os.path.join(baseFolder, className)
    os.makedirs(folder, exist_ok=True)

    counter, collecting = 0, False

    while counter < maxImages:
        success, img = cap.read()
        if not success:
            print("Camera access failed.")
            break

        # Detect hand in the full image
        hands, _ = detector.findHands(img, draw=False)
        if hands:
            # Use the first detected hand
            hand = hands[0]
            bbox = hand['bbox']       # [x, y, w, h]
            lm_list = hand['lmList']    # List of landmarks in full-image coordinates
            x, y, w, h = bbox

            # Calculate padding based on hand size
            xPad = int(w * paddingFactor)
            yPad = int(h * paddingFactor)

            # Compute crop boundaries (ensure they stay within image bounds)
            crop_x1 = max(0, x - xPad)
            crop_y1 = max(0, y - yPad)
            crop_x2 = min(x + w + xPad, img.shape[1])
            crop_y2 = min(y + h + yPad, img.shape[0])
            imgCrop = img[crop_y1:crop_y2, crop_x1:crop_x2]

            if imgCrop.size > 0:
                # -----------------------------------------------------
                # STEP A: Apply segmentation to remove background
                # -----------------------------------------------------
                rgb_crop = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2RGB)
                results_seg = segmentation.process(rgb_crop)
                mask_seg = results_seg.segmentation_mask
                seg_threshold = 0.5  # Adjust threshold if necessary
                mask_binary_seg = (mask_seg > seg_threshold).astype(np.uint8) * 255
                mask_binary_seg = cv2.cvtColor(mask_binary_seg, cv2.COLOR_GRAY2BGR)
                segmented_crop = cv2.bitwise_and(imgCrop, mask_binary_seg)
                
                # Show the segmented crop (background removed)
                cv2.imshow("Segmented Crop", segmented_crop)
                
                # -----------------------------------------------------
                # STEP 1: Draw landmarks on the segmented crop
                # -----------------------------------------------------
                imgCrop_landmarked = segmented_crop.copy()
                for lm in lm_list:
                    adj_x = lm[0] - crop_x1
                    adj_y = lm[1] - crop_y1
                    cv2.circle(imgCrop_landmarked, (adj_x, adj_y), 4, (0, 0, 255), -1)
                for connection in mp.solutions.hands.HAND_CONNECTIONS:
                    pt1 = lm_list[connection[0]]
                    pt2 = lm_list[connection[1]]
                    pt1_adjusted = (pt1[0] - crop_x1, pt1[1] - crop_y1)
                    pt2_adjusted = (pt2[0] - crop_x1, pt2[1] - crop_y1)
                    cv2.line(imgCrop_landmarked, pt1_adjusted, pt2_adjusted, (0, 0, 255), 2)
                
                # -----------------------------------------------------
                # STEP 2: Convert the segmented crop directly to a binary image
                # -----------------------------------------------------
                # Create a blank image for the binary result
                binary_result = np.zeros_like(segmented_crop)
                # Convert the segmented crop to grayscale and threshold it
                gray = cv2.cvtColor(segmented_crop, cv2.COLOR_BGR2GRAY)
                _, binary_from_seg = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
                binary_result[binary_from_seg > 0] = [255, 255, 255]
                # Overlay landmarks (drawn in black) on the binary image
                for lm in lm_list:
                    adj_x = lm[0] - crop_x1
                    adj_y = lm[1] - crop_y1
                    cv2.circle(binary_result, (adj_x, adj_y), 4, (0, 0, 0), -1)
                for connection in mp.solutions.hands.HAND_CONNECTIONS:
                    pt1 = lm_list[connection[0]]
                    pt2 = lm_list[connection[1]]
                    pt1_adjusted = (pt1[0] - crop_x1, pt1[1] - crop_y1)
                    pt2_adjusted = (pt2[0] - crop_x1, pt2[1] - crop_y1)
                    cv2.line(binary_result, pt1_adjusted, pt2_adjusted, (0, 0, 0), 2)
                
                # -----------------------------------------------------
                # STEP 3: Resize the binary image for saving/visualization
                # -----------------------------------------------------
                aspectRatio = (crop_y2 - crop_y1) / (crop_x2 - crop_x1)
                imgWhite = process_and_resize(binary_result, aspectRatio, imgSize)
                if imgWhite is not None:
                    cv2.imshow("Processed Binary Image", imgWhite)
                    if collecting:
                        counter += 1
                        savePath = os.path.join(folder, f"{className.lower()}_{counter}.jpg")
                        cv2.imwrite(savePath, imgWhite)
                        print(f"Saved {counter}/{maxImages} images for {className}")

        # Show the original live feed (for reference)
        cv2.imshow("Live Feed with Landmarks", img)
        key = cv2.waitKey(1)
        if key == ord('s'):
            collecting = True
        if key == ord('p'):
            collecting = False

    print(f"Completed collection for {className}")
    input("Press Enter for next class.")

cap.release()
cv2.destroyAllWindows()


Starting collection for: A
Error during image processing: could not broadcast input array from shape (501,500,3) into shape (500,500,3)
Error during image processing: could not broadcast input array from shape (501,500,3) into shape (500,500,3)


Just Binary - Pieline_1

In [None]:
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import os
import mediapipe as mp

# ----------------------------
# Initialize MediaPipe Selfie Segmentation
# ----------------------------
mp_selfie_segmentation = mp.solutions.selfie_segmentation
segmentation = mp_selfie_segmentation.SelfieSegmentation(model_selection=1)

# ----------------------------
# Initialize Webcam and Hand Detector
# ----------------------------
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)

# Constants and folders for saving images
imgSize = 500
baseFolder = "C:/Users/User/OneDrive/Documents/SignLanguageApp/SLangDataset/JustZ"
letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "HI", "Space"]  # List of classes
maxImages = 600         # Total images to capture per class
paddingFactor = 0.45     # Padding percentage

mp_hands = mp.solutions.hands

# ----------------------------
# Utility function: Process and resize image for saving
# ----------------------------
def process_and_resize(imgCrop, aspectRatio, imgSize):
    channels = 1 if len(imgCrop.shape) == 2 else imgCrop.shape[2]
    imgWhite = np.ones((imgSize, imgSize, channels), np.uint8) * 0
    try:
        if aspectRatio > 1:
            # Height > width:
            k = imgSize / imgCrop.shape[0]
            wCal = math.ceil(k * imgCrop.shape[1])
            imgResize = cv2.resize(imgCrop, (wCal, imgSize))
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wCal + wGap] = imgResize
        else:
            # Width >= height:
            k = imgSize / imgCrop.shape[1]
            hCal = math.ceil(k * imgCrop.shape[0])
            imgResize = cv2.resize(imgCrop, (imgSize, hCal))
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hCal + hGap, :] = imgResize
    except Exception as e:
        print(f"Error during image processing: {e}")
        return None
    return imgWhite

# ----------------------------
# (Optional) Utility function: Detect skin using YCrCb thresholds
# (Not used in the updated processing)
# ----------------------------
def detect_skin(frame):
    ycrcb = cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb)
    lower_skin = np.array([0, 133, 77], dtype=np.uint8)
    upper_skin = np.array([255, 173, 127], dtype=np.uint8)
    mask = cv2.inRange(ycrcb, lower_skin, upper_skin)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.GaussianBlur(mask, (5, 5), 0)
    
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_mask = np.zeros_like(mask)
    cv2.drawContours(contour_mask, contours, -1, 255, thickness=cv2.FILLED)
    mask = cv2.bitwise_and(mask, contour_mask)
    
    return mask

# ----------------------------
# Main Loop: Process each class
# ----------------------------
for className in letters:
    print(f"Starting collection for: {className}")
    folder = os.path.join(baseFolder, className)
    os.makedirs(folder, exist_ok=True)

    counter, collecting = 0, False

    while counter < maxImages:
        success, img = cap.read()
        if not success:
            print("Camera access failed.")
            break

        # Detect hand in the full image
        hands, _ = detector.findHands(img, draw=False)
        if hands:
            # Use the first detected hand
            hand = hands[0]
            bbox = hand['bbox']       # [x, y, w, h]
            x, y, w, h = bbox

            # Calculate padding based on hand size
            xPad = int(w * paddingFactor)
            yPad = int(h * paddingFactor)

            # Compute crop boundaries (ensure they stay within image bounds)
            crop_x1 = max(0, x - xPad)
            crop_y1 = max(0, y - yPad)
            crop_x2 = min(x + w + xPad, img.shape[1])
            crop_y2 = min(y + h + yPad, img.shape[0])
            imgCrop = img[crop_y1:crop_y2, crop_x1:crop_x2]

            if imgCrop.size > 0:
                # -----------------------------------------------------
                # STEP A: Apply segmentation to remove background
                # -----------------------------------------------------
                rgb_crop = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2RGB)
                results_seg = segmentation.process(rgb_crop)
                mask_seg = results_seg.segmentation_mask
                seg_threshold = 0.5  # Adjust threshold if necessary
                mask_binary_seg = (mask_seg > seg_threshold).astype(np.uint8) * 255
                mask_binary_seg = cv2.cvtColor(mask_binary_seg, cv2.COLOR_GRAY2BGR)
                segmented_crop = cv2.bitwise_and(imgCrop, mask_binary_seg)
                
                # Show the segmented crop (background removed)
                cv2.imshow("Segmented Crop", segmented_crop)
                
                # -----------------------------------------------------
                # STEP 2: Convert the segmented crop directly to a binary image
                # (Without overlaying landmarks)
                # -----------------------------------------------------
                gray = cv2.cvtColor(segmented_crop, cv2.COLOR_BGR2GRAY)
                _, binary_from_seg = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
                # Create a binary result image: white where segmented, black elsewhere
                binary_result = np.zeros_like(segmented_crop)
                binary_result[binary_from_seg > 0] = [255, 255, 255]
                
                # -----------------------------------------------------
                # STEP 3: Resize the binary image for saving/visualization
                # -----------------------------------------------------
                aspectRatio = (crop_y2 - crop_y1) / (crop_x2 - crop_x1)
                imgWhite = process_and_resize(binary_result, aspectRatio, imgSize)
                if imgWhite is not None:
                    cv2.imshow("Processed Binary Image", imgWhite)
                    if collecting:
                        counter += 1
                        savePath = os.path.join(folder, f"{className.lower()}_{counter}.jpg")
                        cv2.imwrite(savePath, imgWhite)
                        print(f"Saved {counter}/{maxImages} images for {className}")

        # Show the original live feed (for reference)
        cv2.imshow("Live Feed", img)
        key = cv2.waitKey(1)
        if key == ord('s'):
            collecting = True
        if key == ord('p'):
            collecting = False

    print(f"Completed collection for {className}")
    input("Press Enter for next class.")

cap.release()
cv2.destroyAllWindows()


Starting collection for: A
