In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
import time
from collections import defaultdict

2024-06-26 12:27:25.149960: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-26 12:27:25.198730: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-26 12:27:25.199592: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Load the MoveNet model from TensorFlow Hub
model = hub.load("https://tfhub.dev/google/movenet/multipose/lightning/1")
movenet = model.signatures['serving_default']

In [5]:
# Define the labels for keypoints
label = ["nose", "left eye", "right eye", "left ear", "right ear",
         "left shoulder", "right shoulder", "left elbow", "right elbow",
         "left wrist", "right wrist", "left hip", "right hip",
         "left knee", "right knee", "left ankle", "right ankle"]

score_threshold = 25

In [6]:
# Function to perform pose estimation on a single frame
def estimate_pose(frame, movenet, label):

    original_height , original_width, _ = frame.shape

    input_image = tf.expand_dims(frame, axis=0)

    input_image = tf.cast(tf.image.resize_with_pad(input_image, 256, 256), dtype=tf.int32)
    _, resized_height , resized_width, _ = input_image.shape

    scale_factor_height = original_height / resized_height
    scale_factor_width = original_width / resized_width

    outputs = movenet(input_image)
    keypoints = outputs['output_0'].numpy()

    num_keypoints = keypoints.shape[1]  # Get the number of detected keypoints

    max_key , key_val = keypoints[0,:,55].argmax(), keypoints[0,:,55].max()

    max_points = keypoints[0,max_key,:]
    max_points = max_points*256
    max_points = max_points.astype(float)


    keypoints_dict = {}
    for i in range(0,len(max_points)-5,3):
        if(max_points[i+2] > score_threshold):
            max_points[i] = max_points[i] * scale_factor_height
            max_points[i+1] = max_points[i+1] * scale_factor_width
            keypoints_dict[label[i//3]] = [max_points[i+1].astype(int),max_points[i].astype(int),max_points[i+2]]

    return keypoints_dict, keypoints

In [7]:

# Function to draw the predicted keypoints and connections on the frame
def draw_pose(frame, keypoints_dict, label):
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    for key in keypoints_dict:
        x, y, score = keypoints_dict[key]
        if score > score_threshold:  # Only plot keypoints with score above 0.2
            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 255), -1)
            cv2.putText(frame, key, (int(x) + 5, int(y) + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1, cv2.LINE_AA)

    # Define connections between keypoints
    connections = [
        ('nose', 'left eye'), ('left eye', 'left ear'), ('nose', 'right eye'), ('right eye', 'right ear'),
        ('nose', 'left shoulder'), ('left shoulder', 'left elbow'), ('left elbow', 'left wrist'),
        ('nose', 'right shoulder'), ('right shoulder', 'right elbow'), ('right elbow', 'right wrist'),
        ('left shoulder', 'left hip'), ('right shoulder', 'right hip'), ('left hip', 'right hip'),
        ('left hip', 'left knee'), ('right hip', 'right knee'), ('left knee', 'left ankle'), ('right knee', 'right ankle')
    ]

    for start_key, end_key in connections:
        if start_key in keypoints_dict and end_key in keypoints_dict:
            start_point = keypoints_dict[start_key][:2]
            end_point = keypoints_dict[end_key][:2]
            cv2.line(frame, tuple(start_point), tuple(end_point), (0, 255, 255), 2)

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return frame


In [8]:
# Function to capture frames from the camera and perform pose estimation
def pose_estimation_camera(movenet, label, q_table, alpha, gamma, epsilon):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform pose estimation
        keypoints_dict, keypoints = estimate_pose(frame, movenet, label)


        state = keypoints[0].flatten().astype(int).tobytes()
        
        # Initialize Q-values for the new state if not already initialized
        if state not in q_table:
            q_table[state] = {lbl: 0 for lbl in label}

        # Randomly choose an action (label) based on epsilon-greedy policy
        if random.uniform(0, 1) < epsilon:
            predicted_label = random.choice(label)
        else:
            state = keypoints[0].flatten().astype(int).tobytes()
            predicted_label = max(q_table[state], key=q_table[state].get)

        # Get the correct label from the user
        correct_label = input("Enter the correct label: ")

        # Calculate the reward
        reward = 1 if predicted_label == correct_label else -1

        # Update Q-table
        state = keypoints[0].flatten().astype(int).tobytes()
        if correct_label not in q_table[state]:
            q_table[state][correct_label] = 0
        q_table[state][predicted_label] = q_table[state][predicted_label] + alpha * (
            reward + gamma * max(q_table[state].values()) - q_table[state][predicted_label]
        )

        # Draw keypoints and connections on the frame
        frame_with_pose = draw_pose(frame, keypoints_dict, label)

        # Display the frame with pose estimation
        cv2.imshow('Pose Estimation', frame_with_pose)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [9]:
# Q-learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

# Initialize Q-table
q_table = defaultdict(lambda: defaultdict(float))

In [1]:
# Run the camera pose estimation function
pose_estimation_camera(movenet, label, q_table, alpha, gamma, epsilon)

NameError: name 'pose_estimation_camera' is not defined