# 1. Import dependencies

In [4]:
import copy
import cv2 as cv
import csv
import mediapipe as mp
import numpy as np
import itertools
import os

from collections import deque

# 2. Initialization

## a. Initialize variables and models

In [5]:
# Initialize camera settings 
webcam = 0
cap = cv.VideoCapture(webcam)
cap.set(cv.CAP_PROP_FRAME_WIDTH, 960)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, 540)

# Initialize Mediapipe's hand model 
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(
    static_image_mode=False,   
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5,
)

# Initialize main directory 
path = os.getcwd()
main_directory = os.path.dirname(path)

## b. Initialize functions

In [6]:
# Calculate landmark list 
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

In [7]:
# Pre-processing calculated landmarks 
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

## c. Initialize dataset template function

In [8]:
# Initialize number of hand keypoints and alphabet class
num_coords = 21
alphabet = "C"

In [9]:
# Initialize .csv file
def init_csv(feature):
    landmarks = ['class']
    for val in range(1, num_coords+1):
        landmarks += ['x{}'.format(val), 'y{}'.format(val)]

    with open(main_directory +'/dataset/csv_pre_combined/keypoints_{}.csv'.format(feature), mode='w', newline='') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(landmarks)

In [10]:
init_csv(alphabet)

## d. Initialize dataset logging function

In [11]:
def logging_csv(feature, landmark_list):

    csv_path = main_directory + '/dataset/csv_pre_combined/keypoints_{}.csv'.format(feature)
    with open(csv_path, 'a', newline="") as f:
        writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([feature, *landmark_list])

    return

# 3. Capture, re-normalize & write landmark keypoints into dataset

In [12]:
# While in capturing process ##################################################
while True:

    # Application stops when "ESC" key is pressed
    if cv.waitKey(5) & 0xFF == 27:  # ESC key.
        break

    # If frame/image in capture is not available left, then stop the application
    available, image = cap.read()
    if not available:
        break
    image = cv.flip(image, 1)  # Mirror display
    debug_image = copy.deepcopy(image)

    # Convert frame image from BGR to RGB for pre-optimization
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    # Optimize detection process 
    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True
    
    # If the hand is detected: ##############################################
    if results.multi_hand_landmarks is not None:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
                                              results.multi_handedness):

            # Convert pre-normalized landmark keys into pixels numbering
            landmark_list = calc_landmark_list(debug_image, hand_landmarks)

            # Convert into relative coordinates / normalize keys from wrist point
            pre_processed_landmark_list = pre_process_landmark(landmark_list)

            # Write/log received keypoints into dataset
            logging_csv(alphabet, pre_processed_landmark_list)

            # Visualize complete hand landmarks
            mp_drawing.draw_landmarks(
                debug_image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
    # Output ################################################################
    cv.imshow('Hand Gesture Recognition', debug_image)
    
cap.release()
cv.destroyAllWindows()

[0.0, 0.0, -0.28, 0.04666666666666667, -0.58, -0.013333333333333334, -0.8066666666666666, -0.03333333333333333, -1.0, -0.05333333333333334, -0.44666666666666666, -0.5, -0.6133333333333333, -0.7266666666666667, -0.7733333333333333, -0.8266666666666667, -0.9333333333333333, -0.8466666666666667, -0.34, -0.5533333333333333, -0.5133333333333333, -0.8333333333333334, -0.7066666666666667, -0.9066666666666666, -0.88, -0.8933333333333333, -0.24666666666666667, -0.56, -0.4066666666666667, -0.8266666666666667, -0.6066666666666667, -0.9066666666666666, -0.78, -0.9066666666666666, -0.16, -0.5266666666666666, -0.32, -0.7333333333333333, -0.48, -0.82, -0.62, -0.8466666666666667]
[0.0, 0.0, -0.27631578947368424, 0.02631578947368421, -0.5723684210526315, -0.05921052631578947, -0.7960526315789473, -0.07236842105263158, -1.0, -0.08552631578947369, -0.46710526315789475, -0.5263157894736842, -0.6118421052631579, -0.7697368421052632, -0.756578947368421, -0.8618421052631579, -0.9144736842105263, -0.881578947

KeyboardInterrupt: 