In [5]:
import cv2
import mediapipe as mp

# Initialize MediaPipe pose model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# Initialize webcam feed
cap = cv2.VideoCapture(0)  # Use 0 for default webcam, change accordingly if you have multiple cameras

while cap.isOpened():
    # Read frame from webcam
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from webcam.")
        break
    
    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process frame with MediaPipe Pose model
    results = pose.process(rgb_frame)
    
    # Draw connections between landmarks with custom color
    if results.pose_landmarks:
        mp_drawing = mp.solutions.drawing_utils
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 0), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2))
    
    # Display frame with landmarks
    cv2.imshow('Body Landmarks Detection', frame)
    
    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [18]:
# Initialize MediaPipe pose model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# Load image from local file
image_path = 'E:/Github_projects/MeasureMe--Precise-online-dress-measurement-with-computer-vision/2.jpg'
frame = cv2.imread(image_path)

# Convert BGR to RGB
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Process image with MediaPipe Pose model
results = pose.process(rgb_frame)

# Draw connections between landmarks with custom color
if results.pose_landmarks:
    mp_drawing = mp.solutions.drawing_utils
    annotated_image = frame.copy()
    mp_drawing.draw_landmarks(annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                              landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                              connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2))

    # Display annotated image with landmarks
    cv2.imshow('Body Landmarks Detection', annotated_image)
    
    # Save annotated image with landmarks
    output_image_path = '1_annotated.jpg'
    cv2.imwrite(output_image_path, annotated_image)

    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print("No landmarks detected in the image.")


Each landmark in the `results.pose_landmarks.landmark` list corresponds to a specific point on the detected human pose. The values associated with each landmark are as follows:

- **x**: The normalized x-coordinate of the landmark within the image frame. It represents the horizontal position of the landmark relative to the width of the image. The value ranges from 0 (left edge of the image) to 1 (right edge of the image).

- **y**: The normalized y-coordinate of the landmark within the image frame. It represents the vertical position of the landmark relative to the height of the image. The value ranges from 0 (top edge of the image) to 1 (bottom edge of the image).

- **z**: The depth or distance of the landmark from the camera plane. This value is expressed in meters and provides information about the position of the landmark along the z-axis (depth) in 3D space. Negative values typically indicate that the landmark is closer to the camera than the origin of the coordinate system.

- **visibility**: The visibility score or confidence level associated with the landmark detection. It represents the likelihood that the landmark is correctly detected by the model. The value ranges from 0 to 1, with higher values indicating higher confidence in the detection.

In the provided example, the pixel coordinates `(635, 132)` correspond to the result of converting the normalized coordinates `(0.49626579880714417, 0.18469412624835968)` to pixel coordinates, based on the width and height of the image frame. These pixel coordinates represent the location of the landmark within the image frame.

In [21]:
# Extract and print pixel coordinates of landmarks
if results.pose_landmarks:
    landmarks = results.pose_landmarks.landmark
    for idx, landmark in enumerate(landmarks):
        h, w, c = frame.shape
        cx, cy = int(landmark.x * w), int(landmark.y * h)
        landmark_name = mp_pose.PoseLandmark(idx).name
        print(f"{landmark_name}: Pixel Coordinates: ({cx}, {cy})")
else:
    print("No landmarks detected in the image.")

NOSE: Pixel Coordinates: (635, 132)
LEFT_EYE_INNER: Pixel Coordinates: (641, 123)
LEFT_EYE: Pixel Coordinates: (644, 123)
LEFT_EYE_OUTER: Pixel Coordinates: (648, 124)
RIGHT_EYE_INNER: Pixel Coordinates: (629, 124)
RIGHT_EYE: Pixel Coordinates: (625, 124)
RIGHT_EYE_OUTER: Pixel Coordinates: (621, 124)
LEFT_EAR: Pixel Coordinates: (651, 129)
RIGHT_EAR: Pixel Coordinates: (617, 130)
MOUTH_LEFT: Pixel Coordinates: (642, 144)
MOUTH_RIGHT: Pixel Coordinates: (626, 145)
LEFT_SHOULDER: Pixel Coordinates: (686, 196)
RIGHT_SHOULDER: Pixel Coordinates: (581, 196)
LEFT_ELBOW: Pixel Coordinates: (747, 249)
RIGHT_ELBOW: Pixel Coordinates: (511, 240)
LEFT_WRIST: Pixel Coordinates: (816, 277)
RIGHT_WRIST: Pixel Coordinates: (436, 250)
LEFT_PINKY: Pixel Coordinates: (840, 283)
RIGHT_PINKY: Pixel Coordinates: (412, 256)
LEFT_INDEX: Pixel Coordinates: (841, 279)
RIGHT_INDEX: Pixel Coordinates: (412, 252)
LEFT_THUMB: Pixel Coordinates: (833, 277)
RIGHT_THUMB: Pixel Coordinates: (421, 251)
LEFT_HIP: Pixel