# Data Collector for Model Training

The following file includes essential imports and functions required for data collection to train the gesture recognition model. It is intended to be executed once or when a sufficient amount of data has been gathered. Running this code facilitates the acquisition of the necessary training data for subsequent model development and evaluation.

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import os

In [3]:
gestures = np.array(["minimize", "switch", "volume_down", "volume_up"])
vids = 30
frames = 30
data = os.path.join("ges_rec_data")

In [4]:
for gesture in gestures:
    for video in range(1, vids + 1):
        try:
            os.makedirs(os.path.join(data, gesture, str(video)))
        except FileExistsError:
            pass

In [5]:
def get_landmark_values(_results):
    face_vals = np.array([[val.x, val.y, val.z] for val in _results.face_landmarks.landmark]).flatten() if _results.face_landmarks else np.zeros(1404)
    left_hand_vals = np.array([[val.x, val.y, val.z] for val in _results.left_hand_landmarks.landmark]).flatten() if _results.left_hand_landmarks else np.zeros(63)
    right_hand_vals = np.array([[val.x, val.y, val.z] for val in _results.right_hand_landmarks.landmark]).flatten() if _results.right_hand_landmarks else np.zeros(63)

    return np.concatenate((face_vals, left_hand_vals, right_hand_vals))

In [7]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    exit_flag = False
    for gesture in gestures:
        for video in range(1, vids + 1):
            if video == 30:
                        cv2.putText(image, 'LAST VIDEO FOR {}. SWITCH TO NEXT GESTURE.'.format(gesture), (800, 450), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                        cv2.imshow('MediaPipe Holistic', image)
                        cv2.waitKey(5000)
            for frame in range(1, frames + 1):
                success, image = cap.read()
                if not success:
                    print("Ignoring empty camera frame.")
                    # If loading a video, use 'break' instead of 'continue'.
                    continue

                # To improve performance, optionally mark the image as not writeable to
                # pass by reference.
                image.flags.writeable = False
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                results = holistic.process(image)

                # Draw landmark annotation on the image.
                image.flags.writeable = True
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
                mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                
                if frame == 1:
                    cv2.putText(image, 'GESTURE: {}. GET READY'.format(gesture), (800, 450), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'VIDEO: {}'.format(video), (170, 50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, cv2.LINE_AA)
                    cv2.imshow('MediaPipe Holistic', image)
                    cv2.waitKey(2000)
                else:
                    cv2.imshow('MediaPipe Holistic', image)
                    cv2.putText(image, 'VIDEO: {}'.format(video), (170, 50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, cv2.LINE_AA)

                # Collect landmark values
                landmarks = get_landmark_values(results)
                folder_path = os.path.join(data, gesture, str(video), str(frame))
                np.save(folder_path, landmarks)
                
                # Exit out of webcam by pressing q
                if cv2.waitKey(1) == ord('q'):
                    exit_flag = True
                    break
            if exit_flag:
                break
        if exit_flag:
            break


cap.release()
cv2.destroyAllWindows()