In [1]:
import cv2 as cv
import numpy as np
import tensorflow as tf
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarker, PoseLandmarkerOptions
from mediapipe.tasks.python.core.base_options import BaseOptions
import definitions as defs

def load_blazepose(model_path = 'models/blazepose.task'):
    options = PoseLandmarkerOptions(
        base_options=BaseOptions(model_asset_path=model_path),
        running_mode=vision.RunningMode.IMAGE)
    model = PoseLandmarker.create_from_options(options)
    return model

def load_posenet():
    interpreter = tf.lite.Interpreter(model_path="models/posenet2.tflite")
    interpreter.allocate_tensors()
    return interpreter

def load_movenet():
    interpreter = tf.lite.Interpreter(model_path='models/movenet-lightning.tflite')
    interpreter.allocate_tensors()
    return interpreter

def blazepose(model, img):
    # TODO: check that this doesn't cause distortion.
    mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
    output = model.detect(mp_img)
    # print(output.pose_landmarks)
    # TODO: convert to standard form
    return output.pose_landmarks

def movenet(interpreter, img):
    output = run_inference_tflite(interpreter, img, dtype = defs.MOVENET_DTYPE, shape=defs.MOVENET_SHAPE)
    return output

def posenet(interpreter, img):    
    map = run_inference_tflite(interpreter, img, dtype = defs.POSENET_DTYPE, shape=defs.POSENET_SHAPE)
    return map

def run_inference_tflite(interpreter, img, dtype, shape=257):
    # TODO: check how to perform this resize without distortion. this will not yield accurate kp
    # positions, probably.
    input_image = cv.resize(img, (shape, shape))
    cv.waitKey(0)
    input_image = tf.cast(input_image, dtype=dtype)
    input_image = tf.expand_dims(input_image, axis=0)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    interpreter.invoke()
    kps = np.squeeze(interpreter.get_tensor(output_details[0]['index']))
    # TODO: convert to standard form
    return kps

In [2]:
model = load_posenet()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
def process_posenet(map, offset, threshold=0):
    print(offset.shape)
    keypoints = []
    for idx in range(map.shape[-1]):
        cur_map = map[..., idx]
        y, x = np.unravel_index(np.argmax(cur_map), cur_map.shape)
        cur_offset_y = offset[y, x, idx * 2]
        cur_offset_x = offset[y, x, idx * 2 + 1]

        conf = map[y, x, idx]
        conf = 1 / (1 + np.exp(-conf))

        y = y / 8 * defs.POSENET_SHAPE + cur_offset_y
        x = x / 8 * defs.POSENET_SHAPE + cur_offset_x
        y = max(0, min(y, defs.POSENET_SHAPE - 1))
        x = max(0, min(x, defs.POSENET_SHAPE - 1))

        print('here')
        
        if conf > threshold:
            kp = defs.KP2D(idx, y, x, conf, defs.KP_DICT_17[idx])
            print(f'{kp.name} {kp.prob} {kp.x} {kp.y}')
            keypoints.append(kp)
    return keypoints

im = cv.imread('data/raw/test.jpg', cv.IMREAD_COLOR)
map = posenet(model, im)

out = model.get_output_details()
offset = np.squeeze(model.get_tensor(out[1]['index']))
proc = process_posenet(map, offset=offset)

In [None]:
out = model.get_output_details()
print(offset.shape)
print('post squeeze')
offset = np.squeeze(offset)
print(offset.shape)



(1, 9, 9, 34)
post squeeze
(9, 9, 34)
