# [1] Application to collect the data set images

### Installing required libraries

In [33]:
!pip install mediapipe
!pip install opencv-python



### Importing required libraries

In [34]:
import os
import cv2
import mediapipe

### Creating labels
Here each sign of ASL is represneted as a category to be classified </br>
y_labels = labels of each category in the classification  </br>

In [35]:
y_labels = ['A', 'B', 'C', 'D', 'E', 'F', 
            'G', 'H', 'I', 'J', 'K', 'L', 
            'M', 'N', 'O', 'P', 'Q', 'R', 
            'S', 'T', 'U', 'V', 'W', 'X', 
            'Y', 'Z']

count_labels = len(y_labels)

### Setting up fonts for the data collector app

In [43]:
# fonts to be used in the interface
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.7
color = (0, 0, 255) # color in bgr format, red
thickness = 2

### Creating the Folders for the dataset

In [37]:
path = r'datasets_demo'
os.mkdir(path)
for label in y_labels:
    dir_path = os.path.join(path, label)
    os.mkdir(dir_path)

### Data set collection
##### Contol keys for the data collection process :
1. S :: Start data collection in 100 Seconds
2. P :: Pause data collection
3. R :: Restart Data Collection
4. Q :: Quit

In [52]:
cap = cv2.VideoCapture(0) # opening video capture stream from the webcam
sample = cv2.imread("ASL_Alphabet.jpg") # loading refrence image 
cv2.imshow("sample", sample) # displaying the hand symbol 

# status of the collection procress initialized to idle
status = "idle" 
count = 0 # number of images collected per category
curr_cat = 0 # categoory of image being collected presently
max_samples = 500 # number of images to be collected per category

# extracting the frame height of the video stream
frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# frame width = frame hight , i.e. square images
frameWidth = frameHeight

# creating a mdeiapipe hand module to detect and extract landmakrs from the hand
handsModule = mediapipe.solutions.hands

# creating a drawing module for mediapipe
drawingModule = mediapipe.solutions.drawing_utils

#path of the folder to save the images
path = r"datasets_demo"

with handsModule.Hands(static_image_mode=True,      # extracting the landmarks in static image mode
                       min_detection_confidence=0.7, # at least 70% accuracy needed to detect a hand
                       min_tracking_confidence=0.7,  # at lease 70% accuracy needed to track a hand
                       max_num_hands=1) as hands:    # maximum 1 hands will be detected
    
    while (curr_cat < len(y_labels)): # looping till images for all categories is captured
        
        # obtaining the directory path for the current category
        dir_path = os.path.join(path, y_labels[curr_cat])
        
        # capturing a frame from the webcam 
        ret, frame = cap.read()
        
        # cropping the frame to set shape
        frame = frame[0:frameHeight, 0:frameHeight]
        
        #creating a duplicate the frame
        raw = frame.copy()
        
        # adding text in the image for UI
        img = cv2.putText(frame, 'Sample Data Collector', (30,30), font, 1, (255,0,0), 2, cv2.LINE_AA)
        
        # scanning the image for hands
        results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        # checking if hand is detected
        if results.multi_hand_landmarks != None:
            
            # drawing the landmarks in the image
            for handLandmarks in results.multi_hand_landmarks:
                drawingModule.draw_landmarks(img,
                                        handLandmarks,
                                        handsModule.HAND_CONNECTIONS)                
            
            #displaying additional information 
            img = cv2.putText(frame, 'Hand Detected', (30,60), font, fontScale, color, thickness, cv2.LINE_AA)
            img = cv2.putText(frame, 'Category : '+y_labels[curr_cat], (30,90), font, fontScale, color, thickness, cv2.LINE_AA)
            img = cv2.putText(frame, 'Data point : '+str(count), (30,120), font, fontScale, color, thickness, cv2.LINE_AA)
            
            # if the app is in collection mode, and entire samples are not collected,
            # save the images and increase count by 1
            if(status == 'collecting' and count < max_samples):
                count += 1
                #generating the image path
                img_path = os.path.join(dir_path, str(count)+".jpg")
                cv2.imwrite(img_path, raw) #save img    
                
            # if all samples for the category is collected, 
            # 1. get back to idle mode
            # 2. get back to idle mode
            # 3. increase curr_cat variable
            elif(count == max_samples):
                count = 0
                status = "idle"
                curr_cat += 1
        # if hands not detected, print error message
        else:
                img = cv2.putText(frame, 'Hand not detected', (30,60), font, fontScale, color, thickness, cv2.LINE_AA)
        
        #display the frame
        cv2.imshow('Test hand', img)
        
        key = cv2.waitKey(1)
        
        # checking if any key pressed, and if so the porcessing the commands given
        if(key == 27 or key == ord('q')): # quit the application
            break
        elif(key == ord('s') or key == ord('S')): #start collection
            cv2.waitKey(1000)
            status = 'collecting'
            print("status : ", status)
        elif(key == ord('p') or key == ord('P')): # pause data collection
            status = 'paused'
            print("status : ", status)
        elif(key == ord('r') or key == ord('R')): # restart data collection for current category
            status = "restart"
            print("restart")
            count = 0
            status = "idle"
            print("status : ", status)
            

cv2.destroyAllWindows() # close all windows created
cap.release() #release the webcam video stream

In [41]:
cap.release()
cv2.destroyAllWindows()