# Human Pose Estimation using MoveNet

### Import packages

In [8]:
import tensorflow as tf
import numpy as np
import mediapipe as mp
import cv2
import time
import psutil
from line_profiler import LineProfiler

### Visualisation Fns

In [9]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [10]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1) 

In [11]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

### MoveNet Thunder

In [12]:
thunder_interpreter = tf.lite.Interpreter(model_path='models/movenet-thunder.tflite')
thunder_interpreter.allocate_tensors()

ValueError: Could not open 'models/movenet-thunder.tflite'.

In [6]:
cap = cv2.VideoCapture(0)

prev_frame_time = 0
new_frame_time = 0

while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret: 
      break
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 256,256)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = thunder_interpreter.get_input_details()
    output_details = thunder_interpreter.get_output_details()
    
    # Make predictions 
    thunder_interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    thunder_interpreter.invoke()
    keypoints_with_scores = thunder_interpreter.get_tensor(output_details[0]['index'])
    
    # Calculate frame rate
    new_frame_time = time.time() 
    
    fps = 1/(new_frame_time-prev_frame_time) 
    prev_frame_time = new_frame_time 
    fps = str(int(fps)) 
    
    ram_usage = psutil.virtual_memory().percent
    cpu_usage = psutil.cpu_percent()

    # putting the FPS count on the frame 
    cv2.putText(frame, 'FPS: {}'.format(fps), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2) 
    
    # Display the RAM usage
    cv2.putText(frame, f"RAM Usage: {ram_usage}%", (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2)
    
    # Display CPU usage
    cv2.putText(frame, f"CPU Usage: {cpu_usage}%", (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100,255,0), 2)

  
    # Rendering     
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Thunder', frame)

    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

NameError: name 'thunder_interpreter' is not defined

### MoveNet Lightning

In [7]:
lightning_interpreter = tf.lite.Interpreter(model_path='models/movenet-lightning.tflite')
lightning_interpreter.allocate_tensors()

ValueError: Could not open 'models/movenet-lightning.tflite'.

In [13]:
cap = cv2.VideoCapture(0)

prev_frame_time = 0
new_frame_time = 0

while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret: 
      break
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = lightning_interpreter.get_input_details()
    output_details = lightning_interpreter.get_output_details()
    
    # Make predictions 
    lightning_interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    lightning_interpreter.invoke()
    keypoints_with_scores = lightning_interpreter.get_tensor(output_details[0]['index'])
    
      # Calculate frame rate
    new_frame_time = time.time() 
    
    fps = 1/(new_frame_time-prev_frame_time) 
    prev_frame_time = new_frame_time 
    fps = str(int(fps)) 
    
    ram_usage = psutil.virtual_memory().percent
    cpu_usage = psutil.cpu_percent()

    # putting the FPS count on the frame 
    cv2.putText(frame, 'FPS: {}'.format(fps), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2) 
    
    # Display the RAM usage
    cv2.putText(frame, f"RAM Usage: {ram_usage}%", (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2)
    
    # Display CPU usage
    cv2.putText(frame, f"CPU Usage: {cpu_usage}%", (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100,255,0), 2)

  
    # Rendering     
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

[ WARN:0@30.386] global cap_v4l.cpp:997 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:0@30.387] global obsensor_uvc_stream_channel.cpp:159 getStreamChannelGroup Camera index out of range


# Human Pose Estimation using MediaPipe (MP Pose)

In [14]:
# Initialize MediaPipe pose model
mp_pose = mp.solutions.pose

# Supported Mediapipe visualization tools
mp_drawing = mp.solutions.drawing_utils

pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Initialize Haar cascade classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

I0000 00:00:1714120593.639821  116205 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1714120593.642711  116889 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) UHD Graphics 620 (KBL GT2)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [15]:
# Function to perform quality control checks and overlay text boxes and pose landmarks
def perform_quality_control(frame):
    # Convert frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform pose estimation with MediaPipe
    results = pose.process(rgb_frame)

    # Check if body is present
    is_body_present = results.pose_landmarks is not None

    # Perform face detection with Haar cascade classifier
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    is_face_present = len(faces) > 0

    # Check for proper lighting
    mean_intensity = cv2.mean(gray)[0]  # Mean intensity of the frame
    is_proper_lighting = mean_intensity > 100  # Adjust this threshold as needed

    # Overlay text boxes on the frame
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.8
    font_color = (100, 255, 0)
    line_thickness = 1

    # Display face detection result
    face_text = "Face Present" if is_face_present else "Face Not Present"
    cv2.putText(frame, face_text, (440, 20), font, font_scale, font_color, line_thickness)

    # Display body detection result
    body_text = "Body Present" if is_body_present else "Body Not Present"
    cv2.putText(frame, body_text, (440, 50), font, font_scale, font_color, line_thickness)

    # Display proper lighting check result
    lighting_text = "Proper Lighting" if is_proper_lighting else "Low Lighting"
    cv2.putText(frame, lighting_text, (440, 80), font, font_scale, font_color, line_thickness)
    
  
    # Overlay pose landmarks on the frame
    if results.pose_landmarks is not None:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    return frame

In [16]:
# Start camera capture
cap = cv2.VideoCapture(0)

prev_frame_time = 0
new_frame_time = 0

# Profile the perform_quality_control function
profiler = LineProfiler()
profiler.add_function(perform_quality_control)

# Loop to capture frames and perform quality control checks
while True:
    # Read frame from the camera
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break

    profiler.enable_by_count()
    frame_with_text = perform_quality_control(frame)
    profiler.disable_by_count()

    # Calculate frame rate
    new_frame_time = time.time() 
    
    fps = 1/(new_frame_time-prev_frame_time) 
    prev_frame_time = new_frame_time 
    fps = str(int(fps)) 
    
    ram_usage = psutil.virtual_memory().percent
    cpu_usage = psutil.cpu_percent()

    # putting the FPS count on the frame 
    cv2.putText(frame, 'FPS: {}'.format(fps), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2) 
    
    # Display the RAM usage
    cv2.putText(frame, f"RAM Usage: {ram_usage}%", (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100, 255, 0), 2)
    
    # Display CPU usage
    cv2.putText(frame, f"CPU Usage: {cpu_usage}%", (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (100,255,0), 2)

  
    # Display the output frame
    cv2.imshow('Media Pipe', frame)
    
    # Check for 'q' key to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()

qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/dell/.local/lib/python3.10/site-packages/cv2/qt/plugins"
