# Hand tracking skeleton (Mac OS X)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from skimage import io
import numpy as np
import datetime
import pickle
import time
import cv2

from thread_camera import ThreadCamera
from utils import load_image_with_alpha, overlay_alpha
from utils import draw_text, draw_multiline_text, draw_skeleton

from jesture_sdk_python import JestureSdkRunner
from webcam_draw import WebcamDrawStream

print('cv2.__version__:', cv2.__version__)  # 4.1.2 recommended

shutil.which("libSystem.B.dylib"): None
ctypes.CDLL("libSystem.B.dylib")._name: libSystem.B.dylib
ctypes.__version__: 1.1.0
platform.mac_ver(): ('10.16', ('', '', ''), 'x86_64')


(MainThread) Loaded backend module://ipykernel.pylab.backend_inline version unknown.


cv2.__version__: 4.1.2


In [3]:
# create the application window
name = 'JestureSDK: Python Demo'
width, height = (640, 480)
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(name, (width+40, height+20))
cv2.startWindowThread()

# set the data file
now = datetime.datetime.now()
dt = f'{now.day:02d}{now.month:02d}{now.year%100:02d}_{now.hour:02d}_{now.minute:02d}'
data_file_name = f'hand_kps_v1_{dt}.pkl'

# set the logo stuff
logo_path = f'jesture_logo_comfortaa-removebg.png'
logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)
logo_loc = (10, 10)

# set the gestures help stuff
key_to_idx = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
              '6': 6, '7': 7, '8': 8, '9': 9, 'f': 10, 'g': 11, 
              'd': 12, 'c': 13, 'h': 14}
key_ords = [ord(x) for x in key_to_idx]
idx_to_gesture = {0: 'no_gesture', 1: 'one', 2: 'two', 3: 'three', 4: 'four', 
                  5: 'five', 6: 'fist', 7: 'peace', 8: 'love', 9: 'ok', 
                  10: 'fuck', 11: '1-gun', 12: '2-gun', 13: 'call_me', 14: 'horns'}
idx_to_count = {k: 0 for k in idx_to_gesture}
# help_textlist = [f'{k}: {idx_to_gesture[key_to_idx[k]]} {idx_to_count[key_to_idx[k]]}' for k in key_to_idx]
# help_textlist_str = '\n'.join(help_textlist)

help_box_width = 175
help_box_tl = {'right': (10, height//5+10), 
               'left': (width-help_box_width, height//5+10)}
help_box_br = {'right': (10+help_box_width, height-30), 
               'left': (width, height-30)}
help_text_loc = {'right': (help_box_tl['right'][0]+10, help_box_tl['right'][1]+10),
                 'left': (help_box_tl['left'][0]+10, help_box_tl['left'][1]+10)}
help_font = ImageFont.truetype("Comfortaa-Light.ttf", 20)

# set the scaled hands stuff
mid_hand_box_tl = (width//3, height-height//5)
mid_hand_box_br = (2*width//3, height)
hand_box_tl = {'right': (2*width//3, height-height//5),
               'left': (0, height-height//5)}
hand_box_br = {'right': (width, height),
               'left': (width//3, height)}

# set the hand type stuff
handtype_text = {"right": "Right hand capture (L/R)", 
                 "left": "Left hand capture (L/R)"}
handtype_text_loc = (width//2, 25)

# set the counter stuff
count_text_loc = (width//3, 25)

# set common font
font = ImageFont.truetype("Comfortaa-Light.ttf", 24)

# variables used in the main loop
pressed_duration = 0
pressed_text = ''

selfie_mode = True
hand_type = 'right'
data_list = []
prev_k = ''
i = 0

(MainThread) STREAM b'IHDR' 16 13
(MainThread) STREAM b'IDAT' 41 8192


Removing 250 pixels from up and down borders
Original image size: (1920, 580)
Target size: (192, 58)


In [4]:
class Args:
    def __init__(self):
        self.cam_id = 1
args = Args()

In [5]:
time.sleep(1)
# start Jesture SDK Python runner
jesture_runner = JestureSdkRunner(cam_id=args.cam_id)
jesture_runner.start_recognition()

(MainThread) [JestureSdkRunner] Instance created.
(jesture_sdk_python_thread) [JestureSdkRunner] Starting recognition...
(MainThread) [JestureSdkRunner] Recognition thread started.


<jesture_sdk_python.JestureSdkRunner at 0x7fb8f63e4cd0>

In [6]:
# jesture_runner.stop_recognition()

In [7]:
time.sleep(3)
# start reading frames to display in the application window
cap = WebcamDrawStream(
    jesture_runner, cam_id=args.cam_id, width=width, height=height,
    hand_box_tl=mid_hand_box_tl, hand_box_br=mid_hand_box_br,
    draw_hand_box=False
)
cap.start()

(MainThread) [WebcamDrawStream] Starting a thread...
(MainThread) [WebcamDrawStream] Thread started.


<webcam_draw.WebcamDrawStream at 0x7fb8f6520410>

In [8]:
time.sleep(3)
while(True):
    if cap.frame is None:
        continue
        
#     cap.hand_box_tl = hand_box_tl[hand_type]
#     cap.hand_box_br = hand_box_br[hand_type]
    
    # get current webcam image with drawn hand skeletons
    frame = cap.frame[:,::-1,:] if selfie_mode else cap.frame
    
    # draw logo
    frame = overlay_alpha(logo_img[:,:,::-1], logo_alpha, frame, loc=logo_loc, alpha=1.0)
    
    # draw ui elements
    frame = Image.fromarray(frame if type(np.array([])) == type(frame) else frame.get())
    draw = ImageDraw.Draw(frame, "RGBA")
    draw.rectangle((help_box_tl[hand_type], help_box_br[hand_type]), 
                   fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
    # draw.rectangle((hand_box_tl, hand_box_br), fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
    
    # draw text
    draw.multiline_text(handtype_text_loc, handtype_text[hand_type], 
                        font=font, fill=(255, 255, 255, 200))
    
    help_textlist = [f'{idx_to_count[key_to_idx[k]]} | {k}: {idx_to_gesture[key_to_idx[k]]}' 
                     for k in key_to_idx]
    help_textlist_str = '\n'.join(help_textlist)
    draw.multiline_text(help_text_loc[hand_type], help_textlist_str, 
                        font=help_font, fill=(255, 255, 255))
    
    # retrieve keyboard signal
    c = cv2.waitKey(1) % 256
    if c == ord('q'):
        break
        
    if c == ord('l'):
        hand_type = 'left'
    if c == ord('r'):
        hand_type = 'right'
    
    # retrieve if gesture key is pressed
    if chr(c) in key_to_idx:
        k, v = chr(c), idx_to_gesture[key_to_idx[chr(c)]]
        pressed_text = f'{idx_to_count[key_to_idx[k]]} | {k}: {v}'
        idx_to_count[key_to_idx[k]] += 1
        pressed_duration = 4
        print(f"pressed {pressed_text}, shape: {frame.size}")
        data_list.append({
            'hand_type': hand_type,
            'gesture_id': key_to_idx[k],
            'gesture_name': v,
            'pred_gesture_name': jesture_runner.get_gesture(
                f'{hand_type}_static'),
            'keypoints': jesture_runner.get_hand_keypoints(
                f'{hand_type}_keypoints', mirror=False),
            'scaled_keypoints': jesture_runner.get_hand_keypoints(
                f'scaled_{hand_type}_keypoints', mirror=False),
        })
        # save current data to not to lose it 
        # in case if the program accidentally exited
        if k != prev_k:
            with open(data_file_name, 'wb') as file:
                pickle.dump(data_list, file)
        prev_k = k
    
    # draw notification text if key was pressed less then 12 frames ago
    if pressed_duration > 0:
        notify_textlist_str = "\n".join(
            [x if x == pressed_text else "" for x in help_textlist])
        draw.multiline_text(help_text_loc[hand_type], notify_textlist_str, 
                            font=help_font, fill=(235, 190, 63))
        pressed_duration -= 1
   
    frame = np.array(frame).astype(np.uint8)
    cv2.imshow(name, frame)
            
    i += 1


with open(data_file_name, 'wb') as file:
    print(f'Dumping {len(data_list)} items to {data_file_name}...')
    pickle.dump(data_list, file)
    print(f'Dumped.')
    

cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

(480, 640, 3)
pressed 0 | 0: no_gesture, shape: (640, 480)
pressed 1 | 0: no_gesture, shape: (640, 480)
pressed 2 | 0: no_gesture, shape: (640, 480)
pressed 3 | 0: no_gesture, shape: (640, 480)
pressed 4 | 0: no_gesture, shape: (640, 480)
pressed 5 | 0: no_gesture, shape: (640, 480)
pressed 6 | 0: no_gesture, shape: (640, 480)
pressed 0 | 1: one, shape: (640, 480)
pressed 1 | 1: one, shape: (640, 480)
pressed 2 | 1: one, shape: (640, 480)
pressed 3 | 1: one, shape: (640, 480)
pressed 0 | 2: two, shape: (640, 480)
pressed 1 | 2: two, shape: (640, 480)
pressed 0 | 3: three, shape: (640, 480)
pressed 0 | 4: four, shape: (640, 480)
pressed 0 | 5: five, shape: (640, 480)
pressed 0 | 6: fist, shape: (640, 480)
pressed 0 | 7: peace, shape: (640, 480)
pressed 0 | 8: love, shape: (640, 480)
pressed 0 | 9: ok, shape: (640, 480)
pressed 7 | 0: no_gesture, shape: (640, 480)
pressed 8 | 0: no_gesture, shape: (640, 480)
pressed 1 | 9: ok, shape: (640, 480)
pressed 1 | 7: peace, shape: (640, 480)
pre

(MainThread) [WebcamDrawStream] Stopping...
(Camera-Draw Python Thread) [WebcamDrawStream] Frame loop finished.
(Camera-Draw Python Thread) [WebcamDrawStream] Capture released.
(MainThread) [WebcamDrawStream] Camera thread joined.
(MainThread) [JestureSdkRunner] Stopping recognition...
(MainThread) [JestureSdkRunner] Recognition stopped.


Dumping 125 items to hand_kps_v1_150221_22_31.pkl...
Dumped.


(MainThread) [JestureSdkRunner] Thread joined.


-1

In [9]:
cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
# cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

(MainThread) [WebcamDrawStream] Stopping...
(MainThread) [WebcamDrawStream] Camera thread joined.
(MainThread) [JestureSdkRunner] Stopping recognition...
(MainThread) [JestureSdkRunner] Recognition stopped.
(MainThread) [JestureSdkRunner] Thread joined.


-1

In [1]:
import pickle

data_file_name = './hand_kps_v1_150221_22_36.pkl'

with open(data_file_name, 'rb') as file:
    loaded_data = pickle.load(file)

print(len(loaded_data))
print(loaded_data[:3], loaded_data[-3:])

196
[{'hand_type': 'right', 'gesture_id': 8, 'gesture_name': 'love', 'pred_gesture_name': 'SPIDERMAN', 'keypoints': array([[ 1.44262512e+02,  3.73496460e+02, -1.53652582e-04],
       [ 1.95553406e+02,  3.56603821e+02, -1.91527177e-02],
       [ 2.36296692e+02,  3.12239685e+02, -1.51518285e-02],
       [ 2.57951691e+02,  2.72399078e+02, -1.92700978e-02],
       [ 2.73626251e+02,  2.47165924e+02, -1.73198394e-02],
       [ 2.06070786e+02,  2.52581085e+02,  2.99416333e-02],
       [ 2.18128937e+02,  2.00589523e+02,  7.77604617e-03],
       [ 2.25730072e+02,  1.67592697e+02,  1.34092057e-03],
       [ 2.30226913e+02,  1.36362350e+02,  2.16531521e-03],
       [ 1.78465607e+02,  2.51619507e+02,  1.54631585e-02],
       [ 1.94475235e+02,  2.31411728e+02, -6.14254884e-02],
       [ 2.01716705e+02,  2.77529968e+02, -8.15380886e-02],
       [ 2.02018433e+02,  3.10308899e+02, -6.83032796e-02],
       [ 1.49247513e+02,  2.55429504e+02, -1.07672540e-02],
       [ 1.62706482e+02,  2.32377716e+02, -9

---

In [None]:
# https://unicode.org/emoji/charts/emoji-list.html#1f44c
gesture_to_emoji = {
    '': '',
    '———': '———',
    'ONE': '\U0000261D',
    'TWO': 'TWO',
    'THREE': 'THREE',
    'FOUR': 'FOUR',
    'FIVE' : '\U0001F590',
    'OK': '\U0001F44C',
    'YEAH': '\U0000270C',
    'SPIDERMAN': '\U0001F91F',
    'ROCK': '\U0001F918',
    'FIST': '\U00009270A'
}

# !pip install emoji
import unicodedata
# in python2 use u'\U0001f603'
print('\U0001F44C')#U+1F44C
print(gesture_to_emoji['FIVE'])

In [None]:
def show_logo_alpha():
    design_root = '/Users/izakharkin/Desktop/deepjest/_design'
    logo_path = f'{design_root}/wix/jesture_ai_logo_comfortaa/jesture_logo_comfortaa-removebg.png'

    logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)

    plt.imshow(logo_img);
    plt.show();
    plt.imshow(logo_alpha, cmap='gray');
    plt.show();

In [None]:
def test_overlay():
    testimg = io.imread('./test.jpg')
    testimg = overlay_alpha(logo_img, logo_alpha, testimg, loc=(10, 10), alpha=1.0)

    idx_to_gesture = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five',
                      6: 'fist', 7: 'piece', 8: 'love', 9: 'ok', 0: 'horns'}
    help_textlist = [f'{k}: {v}' for k, v in idx_to_gesture.items()]
    testimg = draw_multiline_text(testimg, help_textlist)

    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_blackbox():
    blackbox = io.imread('./blackbox.png')
    orig_size = (blackbox.shape[1], blackbox.shape[0])
    print('orig_size:', orig_size)
    target_size = (orig_size[0] // 7, orig_size[1] // 7)
    print('target_size:', target_size)
    blackbox = cv2.resize(blackbox, target_size)
    plt.imshow(blackbox);
    plt.show();

    blackbox_alpha = np.ones_like(blackbox)[:,:,0] * 255
    blackbox_alpha[blackbox[:,:,0] == 255] = 0
    plt.imshow(blackbox_alpha, cmap='gray');
    plt.show();

    testimg = overlay_alpha(blackbox, blackbox_alpha, testimg, loc=(10, 100), alpha=1.0)
    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_pil_draw():
    testimg = io.imread('./test.jpg')
    testimg = Image.fromarray(testimg)
    draw = ImageDraw.Draw(testimg, "RGBA")
    draw.rectangle(((280, 10), (1010, 706)), fill=(0, 0, 0, 127))
    draw.rectangle(((280, 10), (1010, 706)), outline=(63, 190, 235, 127))
    plt.figure(figsize=(12, 10))
    plt.imshow(np.array(testimg));