# Hand tracking skeleton (Mac OS X)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from skimage import io
import numpy as np
import datetime
import pickle
import time
import cv2
import sys
import os

sys.path.append(os.path.abspath(".."))

from src.utils import load_image_with_alpha, overlay_alpha
from src.utils import draw_text, draw_multiline_text, draw_skeleton
from src.thread_camera_draw import ThreadCameraDraw

from jesture_sdk_python.jesture_sdk_python import JestureSdkRunner

import tensorflow as tf

print('cv2.__version__:', cv2.__version__)  # 4.1.2 recommended

shutil.which("libSystem.B.dylib"): None
ctypes.CDLL("libSystem.B.dylib")._name: libSystem.B.dylib
ctypes.__version__: 1.1.0
platform.mac_ver(): ('10.16', ('', '', ''), 'x86_64')


(MainThread) Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
(MainThread) Loaded backend module://ipykernel.pylab.backend_inline version unknown.


cv2.__version__: 3.4.3


In [3]:
# create the application window
name = 'RSL: 10 static gestures'
width, height = (640, 480)  # (1280, 720)
cv2.namedWindow(name)
# cv2.resizeWindow(name, (width, height))
cv2.startWindowThread()

# set the data file
now = datetime.datetime.now()
dt = f'{now.day:02d}{now.month:02d}{now.year%100:02d}_{now.hour:02d}_{now.minute:02d}'
data_file_name = f'hand_kps_v1_{dt}.pkl'

# set the logo stuff
logo_path = f'../images/jesture_logo.png'
logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)
logo_loc = (10, 10)

# set the gestures help stuff
key_to_idx = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
              '6': 6, '7': 7, '8': 8, '9': 9, 'f': 10, 'z': 11}
key_ords = [ord(x) for x in key_to_idx]
idx_to_gesture = {0: 'Жест "А"', 1: 'Жест "Б"', 2: 'Жест "В"', 3: 'Жест "Г"', 4: 'Жест "Е"', 5: 'Жест "Ж"', 
                  6: 'Жест "И"', 7: 'Жест "Л"', 8: 'Жест "М"', 9: 'Жест "Н"', 10: 'Жест "Я"', 11: 'Нет жеста'}
idx_to_count = {k: 0 for k in idx_to_gesture}
# help_textlist = [f'{k}: {idx_to_gesture[key_to_idx[k]]} {idx_to_count[key_to_idx[k]]}' for k in key_to_idx]
# help_textlist_str = '\n'.join(help_textlist)

help_box_width = 175
help_box_tl = {'right': (10, height//5+10), 
               'left': (width-help_box_width, height//5+10)}
help_box_br = {'right': (10+help_box_width, height-30), 
               'left': (width, height-30)}
help_text_loc = {'right': (help_box_tl['right'][0]+10, help_box_tl['right'][1]+10),
                 'left': (help_box_tl['left'][0]+10, help_box_tl['left'][1]+10)}
help_font = ImageFont.truetype("Comfortaa-Light.ttf", 20)

# set the scaled hands stuff
mid_hand_box_tl = (width//3, height-height//5)
mid_hand_box_br = (2*width//3, height)
hand_box_tl = {'right': (2*width//3, height-height//5),
               'left': (0, height-height//5)}
hand_box_br = {'right': (width, height),
               'left': (width//3, height)}

# set the hand type stuff
handtype_text = {"right": "Right hand capture (L/R)", 
                 "left": "Left hand capture (L/R)"}
handtype_text_loc = (width//2, 25)

# set the counter stuff
count_text_loc = (width//3, 25)

# set common font
font = ImageFont.truetype("../fonts/Comfortaa-Light.ttf", 24)

# variables used in the main loop
pressed_duration = 0
pressed_text = ''

selfie_mode = True
hand_type = 'right'
data_list = []
prev_k = ''
i = 0

(MainThread) STREAM b'IHDR' 16 13
(MainThread) STREAM b'IDAT' 41 8192


Removing 250 pixels from up and down borders
Original image size: (1920, 580)
Target size: (192, 58)


In [4]:
# import tensorflow as tf

# model_path = '/Users/izakharkin/Desktop/jesture_sdk/desktop_demo/notebooks/rsl_models/rsl_fc256.tflite'

# # Load the TFLite model and allocate tensors.
# interpreter = tf.lite.Interpreter(model_path=model_path)
# interpreter.allocate_tensors()

# # Get input and output tensors.
# input_details = interpreter.get_input_details()
# output_details = interpreter.get_output_details()

# # Test the model on random input data.
# input_shape = input_details[0]['shape']
# input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
# interpreter.set_tensor(input_details[0]['index'], input_data)

# interpreter.invoke()

# # The function `get_tensor()` returns a copy of the tensor data.
# # Use `tensor()` in order to get a pointer to the tensor.
# output_data = interpreter.get_tensor(output_details[0]['index'])
# print(output_data)

In [5]:
class RslRecognizer(object):
    def __init__(self, model_path):
        self.model = tf.lite.Interpreter(model_path=model_path)
        self.model.allocate_tensors()
        self.input_details = self.model.get_input_details()
        self.output_details = self.model.get_output_details()
        self.input_shape = self.input_details[0]['shape']
        self.output_shape = self.output_details[0]['shape']
        print('Initialized model:')
        print('Input shape: {}'.format(self.input_shape))
        print('Output shape: {}'.format(self.output_shape))
        
    def __call__(self, x):
        x = x.astype('float32')
        self.model.set_tensor(self.input_details[0]['index'], x)
        self.model.invoke()
        output_data = self.model.get_tensor(self.output_details[0]['index'])
        return output_data

In [6]:
%%time
model_path = '/Users/izakharkin/Desktop/jesture_sdk/desktop_demo/notebooks/rsl_models/rsl_fc256.tflite'
rsl_model = RslRecognizer(model_path)
rsl_model(np.array(np.random.random_sample((1, 63)), dtype=np.float32)) 

Initialized model:
Input shape: [ 1 63]
Output shape: [ 1 12]
CPU times: user 2.61 ms, sys: 1.65 ms, total: 4.26 ms
Wall time: 6.41 ms


array([[ -98.0575  ,  -24.879929, -156.98726 , -122.52011 ,   19.554266,
         -92.070366,  -99.12113 , -102.96895 ,  -85.96015 , -133.84106 ,
         -58.017197,   81.37131 ]], dtype=float32)

In [7]:
class Args:
    def __init__(self):
        self.cam_id = 1
args = Args()

In [8]:
# start Jesture SDK Python runner
jesture_runner = JestureSdkRunner(cam_id=args.cam_id)
jesture_runner.start_recognition()

(MainThread) [JestureSdkRunner] Instance created.
(jesture_sdk_python_thread) [JestureSdkRunner] Starting recognition...
(MainThread) [JestureSdkRunner] Recognition thread started.


<jesture_sdk_python.jesture_sdk_python.JestureSdkRunner at 0x7fb1e43ab090>

In [9]:
# jesture_runner.stop_recognition()

In [10]:
time.sleep(3)
# start reading frames to display in the application window
cap = ThreadCameraDraw(
    jesture_runner, cam_id=args.cam_id, width=width, height=height,
    hand_box_tl=mid_hand_box_tl, hand_box_br=mid_hand_box_br,
    draw_hand_box=False
)
cap.start()

(MainThread) [ThreadCameraDraw] Starting a thread...
(MainThread) [ThreadCameraDraw] Thread started.


<src.thread_camera_draw.ThreadCameraDraw at 0x7fb1e43ab950>

In [11]:
help_textlist = [f'{idx_to_gesture[key_to_idx[k]]}' for k in key_to_idx]
help_textlist_str = '\n'.join(help_textlist)


time.sleep(3)
while(True):
    if cap.frame is None:
        continue
        
#     cap.hand_box_tl = hand_box_tl[hand_type]
#     cap.hand_box_br = hand_box_br[hand_type]
    
    # get current webcam image with drawn hand skeletons
    frame = cap.frame[:,::-1,:] if selfie_mode else cap.frame
    
    # draw logo
#     frame = overlay_alpha(logo_img[:,:,::-1], logo_alpha, frame, loc=logo_loc, alpha=1.0)
    
    # recognize current static gesture
    scaled_kps = jesture_runner.get_hand_keypoints(f'scaled_{hand_type}_keypoints')
    gesture_id = rsl_model(scaled_kps.reshape(1, -1))[0].argmax()
    
    # draw ui elements
    frame = Image.fromarray(frame if type(np.array([])) == type(frame) else frame.get())
    draw = ImageDraw.Draw(frame, "RGBA")
    draw.rectangle((help_box_tl[hand_type], help_box_br[hand_type]), 
                   fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
    # draw.rectangle((hand_box_tl, hand_box_br), fill=(0, 0, 0, 127), outline=(235, 190, 63, 255))
    
    # draw text
    draw.multiline_text(handtype_text_loc, handtype_text[hand_type], 
                        font=font, fill=(255, 255, 255, 200))
    
    draw.multiline_text(help_text_loc[hand_type], help_textlist_str, 
                        font=help_font, fill=(255, 255, 255))
    
    # retrieve keyboard signal
    c = cv2.waitKey(1) % 256
    if c == ord('q'):
        break
        
    if c == ord('l'):
        hand_type = 'left'
    if c == ord('r'):
        hand_type = 'right'
    
    # retrieve if gesture key is pressed
#     k, v = chr(c), idx_to_gesture[gesture_id]
    pressed_text = f'{idx_to_gesture[gesture_id]}'
#     idx_to_count[key_to_idx[k]] += 1
#     pressed_duration = 4
#     print(f"pressed {pressed_text}, shape: {frame.size}")
    
    # draw notification text if key was pressed less then 12 frames ago
#     if pressed_duration > 0:
    notify_textlist_str = "\n".join(
        [x if x == pressed_text else "" for x in help_textlist])
    draw.multiline_text(help_text_loc[hand_type], notify_textlist_str, 
                        font=help_font, fill=(235, 190, 63))
#         pressed_duration -= 1
   
    frame = np.array(frame).astype(np.uint8)
    cv2.imshow(name, frame)
            
    i += 1


# with open(data_file_name, 'wb') as file:
#     print(f'Dumping {len(data_list)} items to {data_file_name}...')
#     pickle.dump(data_list, file)
#     print(f'Dumped.')
    

cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

Camera params was set to: 640 480
Real params are: 640 480
[[3.99747505e+02 4.32579861e+02 0.00000000e+00]
 [4.02027206e+02 3.86234865e+02 7.23224357e-02]
 [3.88932304e+02 3.55716820e+02 1.04062103e-01]
 [3.71003113e+02 3.40915318e+02 1.28224492e-01]
 [3.57925911e+02 3.41868153e+02 1.49324834e-01]
 [3.70422516e+02 3.26005154e+02 3.26330103e-02]
 [3.41546249e+02 3.21315079e+02 8.60392079e-02]
 [3.44774818e+02 3.38938837e+02 1.20710135e-01]
 [3.54043503e+02 3.47615376e+02 1.30775496e-01]
 [3.55284195e+02 3.41665993e+02 9.04347096e-03]
 [3.25715637e+02 3.44914999e+02 7.50071853e-02]
 [3.32706413e+02 3.61656418e+02 1.07540719e-01]
 [3.43767281e+02 3.68349581e+02 1.07799232e-01]
 [3.42104111e+02 3.64345579e+02 0.00000000e+00]
 [3.16164360e+02 3.65026188e+02 5.68275191e-02]
 [3.23803139e+02 3.80545235e+02 8.30179825e-02]
 [3.35031090e+02 3.89058352e+02 7.93376788e-02]
 [3.31333923e+02 3.90686960e+02 0.00000000e+00]
 [3.07499733e+02 3.82400036e+02 2.90447623e-02]
 [3.15044689e+02 3.92838678e+

(MainThread) [ThreadCameraDraw] Stopping...
(Camera-Draw Python Thread) [ThreadCameraDraw] Frame loop finished.
(Camera-Draw Python Thread) [ThreadCameraDraw] Capture released.
(MainThread) [ThreadCameraDraw] Camera thread joined.
(MainThread) [JestureSdkRunner] Stopping recognition...
(MainThread) [JestureSdkRunner] Recognition stopped.
(MainThread) [JestureSdkRunner] Thread joined.


-1

In [None]:
cap.stop()
jesture_runner.stop_recognition()

cv2.waitKey(1)
# cv2.destroyWindow(name)
cv2.destroyAllWindows()
cv2.waitKey(1)

In [33]:
# import pickle

# data_file_name = '../out_data/robot_hand_keypoints_120421_03_26.pkl'

# with open(data_file_name, 'rb') as file:
#     loaded_data = pickle.load(file)

# print(len(loaded_data))
# print(loaded_data[:3], loaded_data[-3:])

---

In [None]:
# https://unicode.org/emoji/charts/emoji-list.html#1f44c
gesture_to_emoji = {
    '': '',
    '———': '———',
    'ONE': '\U0000261D',
    'TWO': 'TWO',
    'THREE': 'THREE',
    'FOUR': 'FOUR',
    'FIVE' : '\U0001F590',
    'OK': '\U0001F44C',
    'YEAH': '\U0000270C',
    'SPIDERMAN': '\U0001F91F',
    'ROCK': '\U0001F918',
    'FIST': '\U00009270A'
}

# !pip install emoji
import unicodedata
# in python2 use u'\U0001f603'
print('\U0001F44C')#U+1F44C
print(gesture_to_emoji['FIVE'])

In [None]:
def show_logo_alpha():
    design_root = '/Users/izakharkin/Desktop/deepjest/_design'
    logo_path = f'{design_root}/wix/jesture_ai_logo_comfortaa/jesture_logo_comfortaa-removebg.png'

    logo_img, logo_alpha = load_image_with_alpha(logo_path, remove_borders=True)

    plt.imshow(logo_img);
    plt.show();
    plt.imshow(logo_alpha, cmap='gray');
    plt.show();

In [None]:
def test_overlay():
    testimg = io.imread('./test.jpg')
    testimg = overlay_alpha(logo_img, logo_alpha, testimg, loc=(10, 10), alpha=1.0)

    idx_to_gesture = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five',
                      6: 'fist', 7: 'piece', 8: 'love', 9: 'ok', 0: 'horns'}
    help_textlist = [f'{k}: {v}' for k, v in idx_to_gesture.items()]
    testimg = draw_multiline_text(testimg, help_textlist)

    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_blackbox():
    blackbox = io.imread('./blackbox.png')
    orig_size = (blackbox.shape[1], blackbox.shape[0])
    print('orig_size:', orig_size)
    target_size = (orig_size[0] // 7, orig_size[1] // 7)
    print('target_size:', target_size)
    blackbox = cv2.resize(blackbox, target_size)
    plt.imshow(blackbox);
    plt.show();

    blackbox_alpha = np.ones_like(blackbox)[:,:,0] * 255
    blackbox_alpha[blackbox[:,:,0] == 255] = 0
    plt.imshow(blackbox_alpha, cmap='gray');
    plt.show();

    testimg = overlay_alpha(blackbox, blackbox_alpha, testimg, loc=(10, 100), alpha=1.0)
    plt.figure(figsize=(10, 7))
    plt.imshow(testimg);

In [None]:
def test_pil_draw():
    testimg = io.imread('./test.jpg')
    testimg = Image.fromarray(testimg)
    draw = ImageDraw.Draw(testimg, "RGBA")
    draw.rectangle(((280, 10), (1010, 706)), fill=(0, 0, 0, 127))
    draw.rectangle(((280, 10), (1010, 706)), outline=(63, 190, 235, 127))
    plt.figure(figsize=(12, 10))
    plt.imshow(np.array(testimg));