# Pose Detection Using Media Pipe

## Installing libraries and importing dependencies

In [1]:
!pip install mediapipe opencv-python

/Users/ananyasingh/.zshenv:1: /Library/Java/JavaVirtualMachines/jdk-17.jdk/Contents/Home not found

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils #gives all our drawing utilities, visualise our poses
mp_pose = mp.solutions.pose #importing pose estimation models

In [4]:
#test using webcam to make sure all dependencies and libraries are working
#video feed
cap = cv2.VideoCapture(0)#anything that can capture our video using webcam-- number is the video capture device number (specific to the device)
while cap.isOpened():
    ret,frame = cap.read()#gives current feed from webcam, ret is return variable, frame gives the images from our webcam
    cv2.imshow("Mediapipe Feed",frame)#visualises the image
    
    if cv2.waitKey(10) & 0xFF == ord('q'):#checking whether we hit q (quit the app)
        break #breaking off of our feed
cap.release()#close the webcam
cv2.destroyAllWindows()#close windows opened 




## Make Detections

In [5]:
cap = cv2.VideoCapture(0)
#setting up mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret,frame = cap.read()
        
        #detect stuff and render
        
        #recolour the image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)#mediapipe feed is always obtained in BGR
        image.flags.writeable = False 
        
        #make detection
        results = pose.process(image)
        
        #recolouring back to BGR 
        image.flags.writeable = True
        image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)#recolouring back to BGR because we will feed it back to opencv which takes the BGR encoding
        
        #Render detection
        mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # drawing the detections on the image
        cv2.imshow("Mediapipe Feed",image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break #breaking off of our feed
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1749754725.464785  461464 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1749754725.536290  475174 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749754725.551577  475177 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749754725.644595  475176 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [6]:
# results.pose_landmarks
# mp_pose.POSE_CONNECTIONS

## Determining Joints

In [9]:
cap = cv2.VideoCapture(0)
#setting up mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret,frame = cap.read()
        
        #detect stuff and render
        
        #recolour the image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)#mediapipe feed is always obtained in BGR
        image.flags.writeable = False 
        
        #make detection
        results = pose.process(image)
        
        #recolouring back to BGR 
        image.flags.writeable = True
        image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)#recolouring back to BGR because we will feed it back to opencv which takes the BGR encoding
        
        #Extract Landmarks
        try:
            landmarks = results.pose_landmarks.landmark
        except:
            pass
        print(landmarks)
        #Render detection
        mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # drawing the detections on the image
        cv2.imshow("Mediapipe Feed",image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break #breaking off of our feed
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1749754760.236106  461464 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
W0000 00:00:1749754760.309366  475634 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749754760.321175  475634 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[x: 0.709387302
y: 0.662733614
z: -1.3414067
visibility: 0.99665
, x: 0.721593261
y: 0.575847208
z: -1.31860483
visibility: 0.992608964
, x: 0.73599875
y: 0.569730103
z: -1.31847656
visibility: 0.993461132
, x: 0.748797238
y: 0.563788116
z: -1.31842399
visibility: 0.991538703
, x: 0.670437276
y: 0.595212042
z: -1.35219741
visibility: 0.994730055
, x: 0.646823585
y: 0.602905869
z: -1.35185933
visibility: 0.995548904
, x: 0.623444557
y: 0.61022
z: -1.35219646
visibility: 0.994901776
, x: 0.753510356
y: 0.587568402
z: -0.990482152
visibility: 0.992189407
, x: 0.572882
y: 0.633027911
z: -1.13170767
visibility: 0.997733057
, x: 0.737401247
y: 0.744452417
z: -1.19874656
visibility: 0.995784283
, x: 0.67146194
y: 0.769281685
z: -1.24089861
visibility: 0.998040438
, x: 0.826583564
y: 0.941298783
z: -0.617201209
visibility: 0.98222357
, x: 0.431763411
y: 0.905047297
z: -0.827497125
visibility: 0.990441442
, x: 0.913666725
y: 1.37042046
z: -0.477718532
visibility: 0.0456782281
, x: 0.320110857
y

In [10]:
len(landmarks)

33

In [11]:
for lndmrk in mp_pose.PoseLandmark:
    print(lndmrk)

PoseLandmark.NOSE
PoseLandmark.LEFT_EYE_INNER
PoseLandmark.LEFT_EYE
PoseLandmark.LEFT_EYE_OUTER
PoseLandmark.RIGHT_EYE_INNER
PoseLandmark.RIGHT_EYE
PoseLandmark.RIGHT_EYE_OUTER
PoseLandmark.LEFT_EAR
PoseLandmark.RIGHT_EAR
PoseLandmark.MOUTH_LEFT
PoseLandmark.MOUTH_RIGHT
PoseLandmark.LEFT_SHOULDER
PoseLandmark.RIGHT_SHOULDER
PoseLandmark.LEFT_ELBOW
PoseLandmark.RIGHT_ELBOW
PoseLandmark.LEFT_WRIST
PoseLandmark.RIGHT_WRIST
PoseLandmark.LEFT_PINKY
PoseLandmark.RIGHT_PINKY
PoseLandmark.LEFT_INDEX
PoseLandmark.RIGHT_INDEX
PoseLandmark.LEFT_THUMB
PoseLandmark.RIGHT_THUMB
PoseLandmark.LEFT_HIP
PoseLandmark.RIGHT_HIP
PoseLandmark.LEFT_KNEE
PoseLandmark.RIGHT_KNEE
PoseLandmark.LEFT_ANKLE
PoseLandmark.RIGHT_ANKLE
PoseLandmark.LEFT_HEEL
PoseLandmark.RIGHT_HEEL
PoseLandmark.LEFT_FOOT_INDEX
PoseLandmark.RIGHT_FOOT_INDEX


In [12]:
#grab one of these landmarks
landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]

x: 0.649991632
y: 0.783108771
z: -0.213259175
visibility: 0.999445856

In [13]:
landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value]

x: 0.952902496
y: 0.772398949
z: -0.716525316
visibility: 0.98647809

In [14]:
landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value]

x: 0.759187162
y: 0.533177733
z: -1.126333
visibility: 0.969078481

## Calculating Angles

In [15]:
def calculate_angle(a,b,c):
    a=np.array(a)#First
    b=np.array(b)#Second
    c=np.array(c)#Third
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle > 180.0:
        angle = 360.0-angle
        
    return angle
                                                        
    

In [16]:
shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x,landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x,landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y]
wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x,landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y]

In [17]:
shoulder, elbow,wrist

([0.6499916315078735, 0.7831087708473206],
 [0.9529024958610535, 0.7723989486694336],
 [0.7591871619224548, 0.5331777334213257])

In [18]:
calculate_angle(shoulder,elbow,wrist)
angle = calculate_angle(shoulder,elbow,wrist)
print(angle)

53.02524237534237


In [19]:
#Render these angles as drawings on the feed 
cap = cv2.VideoCapture(0)
#setting up mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret,frame = cap.read()
        
        #detect stuff and render
        
        #recolour the image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)#mediapipe feed is always obtained in BGR
        image.flags.writeable = False 
        
        #make detection
        results = pose.process(image)
        
        #recolouring back to BGR 
        image.flags.writeable = True
        image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)#recolouring back to BGR because we will feed it back to opencv which takes the BGR encoding
        
        #Extract Landmarks
        try:
            landmarks = results.pose_landmarks.landmark 
            
            #Get co-ordinates
            shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x,landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
            elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x,landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y]
            wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x,landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y]
            
            #Calculating angle
            angle = calculate_angle(shoulder,elbow,wrist)
            # print(angle)
            
            #visualise where the text of the joint is gonna be shown on the image
            
            height, width, _ = image.shape # calculate the co ordinates of the visual feed

            # # Draw the angle at the elbow
            
            cv2.putText(image,f"{angle:.2f}", # rounded to 2 decimals
                        tuple(np.multiply(elbow, [width,height]).astype(int)), # we multiply the coords of elbow with the feed size to normalise the coords
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        1,         # Font Scale
                        (0,255,0), # Bright green for visibility
                        2,         # Thickness
                        cv2.LINE_AA) 

        except:
            pass
        #Render detection
        mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # drawing the detections on the image
        cv2.imshow("Mediapipe Feed",image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break #breaking off of our feed
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1749754791.574946  461464 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
W0000 00:00:1749754791.657672  475965 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749754791.671354  475965 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


## Curl Counter

In [20]:
#Curl counter variables
counter = 0 #counts the reps
stage = None #accounts for the direction (Down/Up)

#Render these angles as drawings on the feed 
cap = cv2.VideoCapture(0)
#setting up mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret,frame = cap.read()
        
        #detect stuff and render
        
        #recolour the image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)#mediapipe feed is always obtained in BGR
        image.flags.writeable = False 
        
        #make detection
        results = pose.process(image)
        
        #recolouring back to BGR 
        image.flags.writeable = True
        image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)#recolouring back to BGR because we will feed it back to opencv which takes the BGR encoding
        
        #Extract Landmarks
        try:
            landmarks = results.pose_landmarks.landmark 
            
            #Get co-ordinates
            shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x,landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
            elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x,landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y]
            wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x,landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y]
            
            #Calculating angle
            angle = calculate_angle(shoulder,elbow,wrist)
            # print(angle)
            
            #visualise where the text of the joint is gonna be shown on the image
            
            height, width, _ = image.shape # calculate the co ordinates of the visual feed

            # # Draw the angle at the elbow
            
            cv2.putText(image,f"{angle:.2f}", # rounded to 2 decimals
                        tuple(np.multiply(elbow, [width,height]).astype(int)), # we multiply the coords of elbow with the feed size to normalise the coords
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        1,         # Font Scale
                        (0,255,0), # Bright green for visibility
                        2,         # Thickness
                        cv2.LINE_AA) 
            
            #Curl Counter Logic
            if angle > 160:
                stage = 'Down'
            if angle < 30 and stage == 'Down':
                stage='Up'
                counter+=1
                # print(counter)

        except:
            pass
        
        # render the curl counter 
        #Setup status box
        cv2.rectangle(image, (0,0), (225,73), (245,117,16), -1) #start, end, colour and linewidth(-1 means fill colour)
        
        #Rep Data
        cv2.putText(image, 'REPS', (15,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0),1, cv2.LINE_AA)
        cv2.putText(image, str(counter), (10,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),2, cv2.LINE_AA)
        
        #Stage Data
        cv2.putText(image, 'STAGE', (65,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0),1, cv2.LINE_AA)
        cv2.putText(image, stage, (60,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),2, cv2.LINE_AA)
        
        
        #Render detection
        mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # drawing the detections on the image
        cv2.imshow("Mediapipe Feed",image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break #breaking off of our feed
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1749754805.594611  461464 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
W0000 00:00:1749754805.671402  476180 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749754805.683497  476181 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
