# Deep Learning Multi-Person Pose Detection for Sport Training Assistance | FGG

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np

In [2]:
# Avoiding OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

## 1. Loading the Model

In [4]:
model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = model.signatures['serving_default']

## 2. Make Pose Detections using the imported model

In [None]:
cap = cv2.VideoCapture('multiple-football.mp4') # establishes connection to computer webcam with '0' or to a specified video file
while cap.isOpened(): # read the current frame that the video is at using cap.read, and unpack the result that we get from that.
    ret, frame = cap.read()
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 352,640)  # We should be adapting the aspect ratio -> it always has to be 256 or above
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detecting keypoints
    results = movenet(input_img)
    # Reshaping given results array and only getting 51 results, as it is the max amount the model can support
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3)) 
    
    # Rendering keypoints
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.3) # We can play with the confidence interval
    
    cv2.imshow('Movenet Multipose', frame)
    
    # When we want to exit, once the 'q' key is pressed, user will exit the frame.
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows 

In [19]:
# Here we can see the confident scores from each of the 17 detection points.
# Notice that the last 8 rows/detection points give a low confident score
# This due to these parts not appearing in the screen as I'm only the computer webcam
# Detection points not seen in the screen are 'right ankle, left ankle, right knee, 
# left knee, right hip, left hip, right wrist, and left wrist.
# Note: 1st column -> frame, 2nd column -> keypoints, 3rd column -> confidence threshold
keypoints_with_scores[0]

array([[0.36133632, 0.48715168, 0.54929507],
       [0.3276042 , 0.51682925, 0.59449834],
       [0.3321823 , 0.44803804, 0.8292286 ],
       [0.35596773, 0.56001365, 0.6975824 ],
       [0.37167224, 0.40303975, 0.7830701 ],
       [0.5121195 , 0.66822916, 0.9037209 ],
       [0.51122415, 0.31783044, 0.8294932 ],
       [0.73503214, 0.7720009 , 0.4944589 ],
       [0.73622394, 0.24371259, 0.7364807 ],
       [0.75935453, 0.7908059 , 0.05407606],
       [0.7857433 , 0.24542014, 0.03278195],
       [0.7795456 , 0.62658995, 0.32489312],
       [0.7779475 , 0.3909648 , 0.25077456],
       [0.7554027 , 0.78384995, 0.16993205],
       [0.75725466, 0.26078653, 0.05835774],
       [0.7747849 , 0.5441537 , 0.05672367],
       [0.75934917, 0.59486985, 0.05471556]], dtype=float32)

In [5]:
# Function to loop through each person detected and render detections
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        # calling the two rendering functions
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

## 3. Drawing KeyPoints

In [6]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)

## 4. Drawing Edges

In [7]:
EDGES = {
    (0,1): 'm',
    (0,2): 'c',
    (1,3): 'm',
    (2,4): 'c',
    (0,5): 'm',
    (0,6): 'c',
    (5,7): 'm',
    (7,9): 'm',
    (6,8): 'c',
    (8,10): 'c',
    (5,6): 'y',
    (5,11): 'm',
    (6,12): 'c',
    (11,12): 'y',
    (11,12): 'm',
    (13,15): 'm',
    (12,14): 'c',
    (14,16): 'c',
}

In [8]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)),  (0,0,255), 2)