In [67]:
import cv2 as cv
import numpy as np
import tensorflow as tf
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarker, PoseLandmarkerOptions
from mediapipe.tasks.python.core.base_options import BaseOptions
import definitions as defs

def load_blazepose(model_path = 'models/blazepose.task'):
    options = PoseLandmarkerOptions(
        base_options=BaseOptions(model_asset_path=model_path),
        running_mode=vision.RunningMode.IMAGE)
    model = PoseLandmarker.create_from_options(options)
    return model

def load_posenet():
    interpreter = tf.lite.Interpreter(model_path="models/posenet2.tflite")
    interpreter.allocate_tensors()
    return interpreter

def load_movenet():
    interpreter = tf.lite.Interpreter(model_path='models/movenet-lightning.tflite')
    interpreter.allocate_tensors()
    return interpreter

def blazepose(model, img):
    # TODO: check that this doesn't cause distortion.
    mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
    output = model.detect(mp_img)
    # print(output.pose_landmarks)
    # TODO: convert to standard form
    return output.pose_landmarks

def movenet(interpreter, img):
    output = run_inference_tflite(interpreter, img, dtype = defs.MOVENET_DTYPE, shape=defs.MOVENET_SHAPE)
    return output

def posenet(interpreter, img):    
    map = run_inference_tflite(interpreter, img, dtype = defs.POSENET_DTYPE, shape=defs.POSENET_SHAPE)
    return map

def run_inference_tflite(interpreter, img, dtype, shape=257):
    # TODO: check how to perform this resize without distortion. this will not yield accurate kp
    # positions, probably.
    input_image = cv.resize(img, (shape, shape))
    input_image = tf.cast(input_image, dtype=dtype)
    input_image = tf.expand_dims(input_image, axis=0)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    interpreter.invoke()
    kps = np.squeeze(interpreter.get_tensor(output_details[0]['index']))
    # TODO: convert to standard form
    return kps

In [68]:
model = load_posenet()

In [70]:
def process_posenet(map, offset, threshold=0):
    print(offset.shape)
    keypoints = []
    for idx in range(map.shape[-1]):
        cur_map = map[..., idx]
        y, x = np.unravel_index(np.argmax(cur_map), cur_map.shape)
        cur_offset = offset[x, y, idx]
        conf = map[y, x, idx]
        conf = 1 / (1 + np.exp(-conf))

        y = y / 8 * defs.POSENET_SHAPE + cur_offset
        x = x / 8 * defs.POSENET_SHAPE + cur_offset
        if(y < 0): y = 0
        if(x < 0): x = 0
                
        if conf > threshold:
            kp = defs.KP2D(idx, y, x, conf, defs.KP_DICT_17[idx])
            print(f'{kp.name} {kp.prob} {kp.x} {kp.y} {cur_offset}')
            keypoints.append(kp)
    return keypoints

im = cv.imread('data/raw/test.jpg', cv.IMREAD_COLOR)
map = posenet(model, im)

out = model.get_output_details()
offset = np.squeeze(model.get_tensor(out[1]['index']))
proc = process_posenet(map, offset=offset)

(9, 9, 34)
nose 0.00025723957319332536 166.05007934570312 37.550079345703125 5.425079345703125
left_eye 0.00036297483040718844 123.02731370925903 0 -5.472686290740967
right_eye 0.00030371665845830876 123.93878412246704 220.31378412246704 -4.561215877532959
left_ear 0.0005375043107950707 256.0625375509262 0 -0.9374624490737915
right_ear 0.00041713279253397477 131.4853491783142 35.11034917831421 2.985349178314209
left_shoulder 0.0008302593871404658 256.4144524335861 256.4144524335861 -0.5855475664138794
right_shoulder 0.0006376022171620848 248.3817138671875 0 -8.6182861328125
left_elbow 0.0007801340093478948 256.21784871816635 0 -0.7821512818336487
right_elbow 0.001244436626206526 259.904732465744 2.9047324657440186 2.9047324657440186
left_wrist 0.0015133987144239534 194.37537479400635 226.50037479400635 1.6253747940063477
right_wrist 0.000602735146169159 257.2635175585747 0.2635175585746765 0.2635175585746765
left_hip 0.001018136171984477 253.95867204666138 0 -3.041327953338623
right_hi

In [36]:
out = model.get_output_details()
print(offset.shape)
print('post squeeze')
offset = np.squeeze(offset)
print(offset.shape)



(1, 9, 9, 34)
post squeeze
(9, 9, 34)
