### Install and Import Dependencies

In [None]:
%pip install tensorflow==2.5.0 tensorflow-gpu==2.5.0 tensorflow-hub opencv-python matplotlib

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np
import pickle

In [None]:
# limit mem-usage for GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


### Load Model

Model card linked [here](chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://storage.googleapis.com/movenet/MoveNet.MultiPose%20Model%20Card.pdf)

The output of Movenet is a tensor with dimensions (1, 6, 56)
- The first dimension is always 1, that is the batch size
- The second dimension 6 represents the up-to 6 people it Movenet can detect simultaneously
- The third dimension 56 is broken as follows
    - The first 51 values are keypoint detections, each keypoint is in format [y, x, score], with 17 keypoints (17 * 3 = 51)
    - The next 5 values are bounding box coordinates for the person and the score in the normalized format [ymin, xmin, ymax, xmax, score]

In [5]:
model = hub.load("https://tfhub.dev/google/movenet/multipose/lightning/1")
movenet = model.signatures['serving_default']

### Draw Keypoints and Edges

In [12]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1]))

    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), 2)

The 17 keypoints are in order
<ol>
  <li> nose </li> 
  <li> left eye </li>
  <li> right eye </li>
  <li> left ear </li>
  <li> right ear </li>
  <li> left shoulder </li>
  <li> right shoulder </li>
  <li> left elbow </li>
  <li> right elbow </li>
  <li> left wrist </li>
  <li> right wrist </li>
  <li> left hip </li>
  <li> right hip </li>
  <li> left knee </li>
  <li> right knee </li>
  <li> left ankle </li>
  <li> right ankle </li>
</ol>

The edges predefines the connections to be make between the keypoints that are above \
the confidence threshold.  Since python is 0-indexed, we will take nose as node 0, left \
eye as node 1, so on.  

In [7]:
# kinematic connection to be drawn -> color of edge
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [8]:
def draw_edges(frame, keypoints, confidence_threshold, edges):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1]))

    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]

        if (c1 > confidence_threshold) & (c2 > confidence_threshold):
              cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)



In [9]:
def draw_pose(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_edges(frame, person, confidence_threshold, edges)
        draw_keypoints(frame, person, confidence_threshold)

### Making Comparisons

In [None]:
ref_img = cv2.imread("test_images/cbum.jpg", cv2.IMREAD_COLOR)
cv2.imshow("Reference Image", ref_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# ref_img = movenet()

# with open('cbum.pkl', 'wb+') as f:
#     pickle.dump(_, f)

In [None]:
def make_detections():
    pass

### Make Detections Live

In [13]:
cap = cv2.VideoCapture(0)
while cap.isOpened:
    ret, frame = cap.read()

    # according to movenet model card, resize image to multiple of 32
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192, 256)  # relative to aspect ratio of frame, resized according to model needs
    input_img = tf.cast(img, dtype=tf.int32)

    # run detections
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # render pose
    draw_pose(frame, keypoints_with_scores, EDGES, 0.3)

    cv2.imshow('Movenet Multipurpose', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

### Make Detections Live, Comaparing to Picture