# Hand tracking skeleton (Mac OS X)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from skimage import io
import numpy as np
import pickle
import time
import cv2

from thread_camera import ThreadCamera
from utils import load_image_with_alpha, overlay_alpha
from utils import draw_text, draw_multiline_text, draw_skeleton

from jesture_sdk_python import JestureSdkRunner
from webcam_draw import WebcamDrawStream

print('cv2.__version__:', cv2.__version__)  # 4.1.2 recommended

(MainThread) Loaded backend module://ipykernel.pylab.backend_inline version unknown.


shutil.which("libSystem.B.dylib"): None
ctypes.CDLL("libSystem.B.dylib")._name: libSystem.B.dylib
ctypes.__version__: 1.1.0
platform.mac_ver(): ('10.16', ('', '', ''), 'x86_64')
cv2.__version__: 4.1.2


In [3]:
# create the application window
name = 'JestureSDK: Python Demo'
width, height = (640, 480)
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(name, (width+40, height+20))
cv2.startWindowThread()

data_file_name = './hand_keypoints_dataset_v1.pkl'

# set the logo stuff
design_root = '/Users/izakharkin/Desktop/deepjest/_design'
logo_path = f'{design_root}/wix/jesture_ai_logo_comfortaa/jesture_logo_comfortaa-removebg.png'
logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)
logo_loc = (10, 10)

# set the gestures help stuff
key_to_idx = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
              '6': 6, '7': 7, '8': 8, '9': 9, 'f': 10, 'u': 11, 
              'd': 12, 'c': 13}
key_ords = [ord(x) for x in key_to_idx]
idx_to_gesture = {0: 'horns', 1: 'one', 2: 'two', 3: 'three', 4: 'four', 
                  5: 'five', 6: 'fist', 7: 'piece', 8: 'love', 9: 'ok', 
                  10: 'fuck', 11: 'thumb_up', 12: 'thumb_down', 13: 'call_me'}
help_textlist = [f'{k}: {idx_to_gesture[key_to_idx[k]]}' for k in key_to_idx]
help_textlist_str = '\n'.join(help_textlist)

help_box_width = 175
help_box_tl = {'right': (10, height//5+20), 
               'left': (width-help_box_width, height//5+20)}
help_box_br = {'right': (10+help_box_width, height-height//5+60), 
               'left': (width, height-height//5+60)}
help_text_loc = {'right': (help_box_tl['right'][0]+10, help_box_tl['right'][1]+10),
                 'left': (help_box_tl['left'][0]+10, help_box_tl['left'][1]+10)}
help_font = ImageFont.truetype("Comfortaa-Light.ttf", 20)

# set the scaled hands stuff
mid_hand_box_tl = (width//3, height-height//5)
mid_hand_box_br = (2*width//3, height)
hand_box_tl = {'right': (2*width//3, height-height//5),
               'left': (0, height-height//5)}
hand_box_br = {'right': (width, height),
               'left': (width//3, height)}

# set the hand type stuff
handtype_text = {"right": "Right hand capture (L/R to swtich)", 
                 "left": "Left hand capture (L/R to swtich)"}
handtype_text_loc = (width//2, 25)

# set common font
font = ImageFont.truetype("Comfortaa-Light.ttf", 24)

# variables used in the main loop
pressed_duration = 0
pressed_text = ''

selfie_mode = True
hand_type = 'right'
data_list = []
prev_k = ''
i = 0

(MainThread) STREAM b'IHDR' 16 13
(MainThread) STREAM b'IDAT' 41 8192


Removing 250 pixels from up and down borders
Original image size: (1920, 580)
Target size: (192, 58)


In [4]:
time.sleep(1)
# start Jesture SDK Python runner
jesture_runner = JestureSdkRunner(cam_id=1)
jesture_runner.start_recognition()

(MainThread) [JestureSdkRunner] Instance created.
(jesture_sdk_python_thread) [JestureSdkRunner] Starting recognition...
(MainThread) [JestureSdkRunner] Recognition thread started.


<jesture_sdk_python.JestureSdkRunner at 0x7fa4d5dacdd0>

In [5]:
# jesture_runner.stop_recognition()

In [6]:
time.sleep(3)
# start reading frames to display in the application window
cap = WebcamDrawStream(
    jesture_runner, cam_id=1, width=width, height=height,
    hand_box_tl=mid_hand_box_tl, hand_box_br=mid_hand_box_br,
    draw_hand_box=False
)
cap.start()

(MainThread) [WebcamDrawStream] Starting a thread...
(MainThread) [WebcamDrawStream] Thread started.


<webcam_draw.WebcamDrawStream at 0x7fa4d2c32a90>

(480, 640, 3)


In [7]:
time.sleep(3)
while(True):
    if cap.frame is None:
        continue
        
#     cap.hand_box_tl = hand_box_tl[hand_type]
#     cap.hand_box_br = hand_box_br[hand_type]
    
    # get current webcam image with drawn hand skeletons
    frame = cap.frame[:,::-1,:] if selfie_mode else cap.frame
    
    # draw logo
    frame = overlay_alpha(logo_img[:,:,::-1], logo_alpha, frame, loc=logo_loc, alpha=1.0)
    
    # draw ui elements
    frame = Image.fromarray(frame if type(np.array([])) == type(frame) else frame.get())
    draw = ImageDraw.Draw(frame, "RGBA")
    draw.rectangle((help_box_tl[hand_type], help_box_br[hand_type]), 
                   fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
#     draw.rectangle((hand_box_tl, hand_box_br), fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
    
    # draw text
    draw.multiline_text(handtype_text_loc, handtype_text[hand_type], 
                        font=font, fill=(255, 255, 255))
    draw.multiline_text(help_text_loc[hand_type], help_textlist_str, 
                        font=help_font, fill=(255, 255, 255))
    
    # retrieve keyboard signal
    c = cv2.waitKey(1) % 256
    if c == ord('q'):
        break
        
    if c == ord('l'):
        hand_type = 'left'
    if c == ord('r'):
        hand_type = 'right'
    
    # retrieve if gesture key is pressed
    if chr(c) in key_to_idx:
        k, v = chr(c), idx_to_gesture[key_to_idx[chr(c)]]
        pressed_text = f'{k}: {v}'
        notify_textlist_str = "\n".join(
            [x if x == pressed_text else "" for x in help_textlist])
        pressed_duration = 4
        print(f"pressed {pressed_text}, shape: {frame.size}")
        data_list.append({
            'hand_type': hand_type,
            'gesture_id': key_to_idx[k],
            'gesture_name': v,
            'pred_gesture_name': jesture_runner.get_gesture(
                f'{hand_type}_static'),
            'keypoints': jesture_runner.get_hand_keypoints(
                f'{hand_type}_keypoints', mirror=False),
            'scaled_keypoints': jesture_runner.get_hand_keypoints(
                f'scaled_{hand_type}_keypoints', mirror=False),
        })
#         print(data_list[-1])
        # save current data to not to lose it 
        # in case if the program accidentally exited
        if k != prev_k:
            with open(data_file_name, 'wb') as file:
                pickle.dump(data_list, file)
        prev_k = k
    
    # draw notification text if key was pressed less then 12 frames ago
    if pressed_duration > 0:
        draw.multiline_text(help_text_loc[hand_type], notify_textlist_str, 
                            font=help_font, fill=(235, 190, 63))
        pressed_duration -= 1
   
    frame = np.array(frame).astype(np.uint8)
    cv2.imshow(name, frame)
            
    i += 1


with open(data_file_name, 'wb') as file:
    print(f'Dumping {len(data_list)} items to {data_file_name}...')
    pickle.dump(data_list, file)
    print(f'Dumped.')
    

cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

pressed c: call_me, shape: (640, 480)
pressed c: call_me, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 8: love, shape: (640, 480)
pressed 7: piece, shape: (640, 480)
pressed 7: piece, shape: (640, 480)
pressed 7: p

(MainThread) [WebcamDrawStream] Stopping...
(Camera-Draw Python Thread) [WebcamDrawStream] Frame loop finished.
(Camera-Draw Python Thread) [WebcamDrawStream] Capture released.
(MainThread) [WebcamDrawStream] Camera thread joined.
(MainThread) [JestureSdkRunner] Stopping recognition...
(MainThread) [JestureSdkRunner] Recognition stopped.


Dumping 34 items to ./hand_keypoints_dataset_v1.pkl...
Dumped.


(MainThread) [JestureSdkRunner] Thread joined.


-1

In [8]:
cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
# cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

(MainThread) [WebcamDrawStream] Stopping...
(MainThread) [WebcamDrawStream] Camera thread joined.
(MainThread) [JestureSdkRunner] Stopping recognition...
(MainThread) [JestureSdkRunner] Recognition stopped.
(MainThread) [JestureSdkRunner] Thread joined.


-1

In [1]:
import pickle

data_file_name = './hand_keypoints_dataset_v1.pkl'

with open(data_file_name, 'rb') as file:
    loaded_data = pickle.load(file)

print(len(loaded_data))
print(loaded_data[:3], loaded_data[-3:])

34
[{'hand_type': 'right', 'gesture_id': 13, 'gesture_name': 'call_me', 'pred_gesture_name': '———', 'keypoints': array([[ 1.79542389e+02,  3.36280609e+02, -3.44231812e-05],
       [ 2.18843719e+02,  3.25856262e+02, -1.59617029e-02],
       [ 2.53346893e+02,  2.88074677e+02, -1.63796842e-02],
       [ 2.74987457e+02,  2.54825790e+02, -2.79200319e-02],
       [ 2.95982483e+02,  2.30596329e+02, -4.38491777e-02],
       [ 2.29035172e+02,  2.50483276e+02, -4.79612360e-03],
       [ 2.37625763e+02,  2.28175095e+02, -6.98904544e-02],
       [ 2.32817200e+02,  2.60853638e+02, -8.17079693e-02],
       [ 2.28645111e+02,  2.87859497e+02, -6.78622648e-02],
       [ 2.04800598e+02,  2.42900650e+02, -1.12882107e-02],
       [ 2.11697922e+02,  2.24285095e+02, -8.63398314e-02],
       [ 2.09950424e+02,  2.63993317e+02, -9.95376036e-02],
       [ 2.08270126e+02,  2.92740906e+02, -8.26514438e-02],
       [ 1.78836899e+02,  2.39264847e+02, -2.74846647e-02],
       [ 1.86689362e+02,  2.14688293e+02, -8.96

---

In [None]:
# https://unicode.org/emoji/charts/emoji-list.html#1f44c
gesture_to_emoji = {
    '': '',
    '———': '———',
    'ONE': '\U0000261D',
    'TWO': 'TWO',
    'THREE': 'THREE',
    'FOUR': 'FOUR',
    'FIVE' : '\U0001F590',
    'OK': '\U0001F44C',
    'YEAH': '\U0000270C',
    'SPIDERMAN': '\U0001F91F',
    'ROCK': '\U0001F918',
    'FIST': '\U00009270A'
}

# !pip install emoji
import unicodedata
# in python2 use u'\U0001f603'
print('\U0001F44C')#U+1F44C
print(gesture_to_emoji['FIVE'])

In [None]:
def show_logo_alpha():
    design_root = '/Users/izakharkin/Desktop/deepjest/_design'
    logo_path = f'{design_root}/wix/jesture_ai_logo_comfortaa/jesture_logo_comfortaa-removebg.png'

    logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)

    plt.imshow(logo_img);
    plt.show();
    plt.imshow(logo_alpha, cmap='gray');
    plt.show();

In [None]:
def test_overlay():
    testimg = io.imread('./test.jpg')
    testimg = overlay_alpha(logo_img, logo_alpha, testimg, loc=(10, 10), alpha=1.0)

    idx_to_gesture = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five',
                      6: 'fist', 7: 'piece', 8: 'love', 9: 'ok', 0: 'horns'}
    help_textlist = [f'{k}: {v}' for k, v in idx_to_gesture.items()]
    testimg = draw_multiline_text(testimg, help_textlist)

    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_blackbox():
    blackbox = io.imread('./blackbox.png')
    orig_size = (blackbox.shape[1], blackbox.shape[0])
    print('orig_size:', orig_size)
    target_size = (orig_size[0] // 7, orig_size[1] // 7)
    print('target_size:', target_size)
    blackbox = cv2.resize(blackbox, target_size)
    plt.imshow(blackbox);
    plt.show();

    blackbox_alpha = np.ones_like(blackbox)[:,:,0] * 255
    blackbox_alpha[blackbox[:,:,0] == 255] = 0
    plt.imshow(blackbox_alpha, cmap='gray');
    plt.show();

    testimg = overlay_alpha(blackbox, blackbox_alpha, testimg, loc=(10, 100), alpha=1.0)
    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_pil_draw():
    testimg = io.imread('./test.jpg')
    testimg = Image.fromarray(testimg)
    draw = ImageDraw.Draw(testimg, "RGBA")
    draw.rectangle(((280, 10), (1010, 706)), fill=(0, 0, 0, 127))
    draw.rectangle(((280, 10), (1010, 706)), outline=(63, 190, 235, 127))
    plt.figure(figsize=(12, 10))
    plt.imshow(np.array(testimg));