In [10]:
import cv2
import mediapipe as mp

### Class for hand detection

In [11]:
class Detector :
    
    def __init__(self) :
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=False, max_num_hands=1,
                                        min_detection_confidence=0.5,
                                        min_tracking_confidence=0.5)
        
    # Function to return coordinates of detected hand, if any
    def findHands(self, img, draw=True, flipType=True):

            res = []

            imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = self.hands.process(imgRGB)
            allHands = []
            h, w, c = img.shape
            if results.multi_hand_landmarks:
                for handType, handLms in zip(results.multi_handedness, results.multi_hand_landmarks):

                    # Landmark list
                    mylmList = []
                    xList = []
                    yList = []
                    for id, lm in enumerate(handLms.landmark):
                        px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
                        mylmList.append([px, py, pz])
                        xList.append(px)
                        yList.append(py)

                    # Bounding box
                    xmin, xmax = min(xList), max(xList)
                    ymin, ymax = min(yList), max(yList)
                    boxW, boxH = xmax - xmin, ymax - ymin
                    bbox = xmin, ymin, boxW, boxH

                    if draw:
                        res.append(bbox)

            if draw:
                return res

### Capturing imgaes for dataset

In [13]:
cap = cv2.VideoCapture(0)
target = 1000   # No. of images to capture
count = 0

detector = Detector()

while True:
    ret, image = cap.read()
    image = cv2.flip(image, 1)
    hand_coor = detector.findHands(image)
    
    if hand_coor:
        for (x,y,w,h) in hand_coor:
            cv2.rectangle(image,(x-20,y-20),(x+w+20,y+h+20),(0,255,255),2)
            cropped_hand = image[y-20:y+h+20, x-20:x+w+20]
        
        count += 1
        if count <= target:
            cropped_hand = cv2.resize(cropped_hand,(224,224))
            file_name_path = r'C:\Users\Development\Desktop\IEEE\Dataset\train\misc\image_' + str(count) + '.jpg'
            cv2.imwrite(file_name_path, cropped_hand)
            
    exit = cv2.waitKey(1)

    if count <= target:
        cv2.putText(image, f"Captured {count} images...", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,255), 2)
    else:
        cv2.putText(image, f"Done! Press Enter to exit.", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)
        if exit == 13:
            break
            
    cv2.imshow("Hands", image)
        
cap.release()
cv2.destroyAllWindows()

### Splitting data into train and validation sets

In [14]:
import random
import os
import shutil

dirs = ['Brighten Up', 'Brighten Down', 'Video Player', 'Word']
dirs = ['tele', 'calc', 'misc']

src_path = r'C:\Users\Development\Desktop\IEEE\Dataset\train'
dst_train_path = r'C:\Users\Development\Desktop\IEEE\Dataset\Upload\train'
dst_val_path = r'C:\Users\Development\Desktop\IEEE\Dataset\Upload\valid'

for d in dirs:
    imgs = random.sample(range(1, 1001), 800)
    for i in imgs:
        shutil.copy(src_path+'\\'+d+'\\image_'+str(i)+'.jpg', dst_train_path+'\\'+d)

    for i in range(1,1001):
        if i not in imgs:
            shutil.copy(src_path+'\\'+d+'\\image_'+str(i)+'.jpg', dst_val_path+'\\'+d)