In [32]:
import os
import joblib
import numpy as np
import mediapipe as mp
import cv2

In [33]:
models_path = "../models"
pca_models_path = models_path + "/pca"

In [34]:
def normalize_landmarks(landmarks: list) -> list:
    """Normalize a list of pose landmarks.

    This function brings the center (mean of outer most x, y and z) to (0, 0, 0)
    and scales so that the maximum distance from the center is 0.5.
    Also remove the z coordinate.

    Args:
        landmarks (list): A flat list of landmark values [x1, y1, z1, v1, x2, y2, z2, v2, ..., xN, yN, zN, vN].

    Returns:
        list: The normalized landmark list in the same format.

    """
    landmarks = np.array(landmarks).reshape(-1, 4)
    max_x = np.max(landmarks[:, 0])
    min_x = np.min(landmarks[:, 0])
    max_y = np.max(landmarks[:, 1])
    min_y = np.min(landmarks[:, 1])

    # Get center
    center_x = (max_x + min_x) / 2
    center_y = (max_y + min_y) / 2
    
    # Bring center to (0, 0, 0)
    landmarks[:, 0] -= center_x
    landmarks[:, 1] -= center_y

    # Get max distance from center
    max_distance = np.max(np.sqrt(landmarks[:, 0]**2 + landmarks[:, 1]**2 + landmarks[:, 2]**2))

    # Scale to 0.5
    scale = 0.5 / max_distance
    landmarks[:, 0] *= scale
    landmarks[:, 1] *= scale

    # Remove z coordinate
    landmarks = landmarks[:, :2]

    # Flatten the array and convert to list
    landmarks = landmarks.flatten().tolist()
    return landmarks


In [35]:
def extract_features_and_classify_image(image_path, model, pose, pca_model=None):
    """Extract pose landmarks from an image and classify it using a pre-trained model.

    Args:
        image_path (str): Path to the input image.
        model: Pre-trained model for classification.
        pose: Pre-trained pose detector.

    """

    # Read the image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Detect pose landmarks
    results = pose.process(image_rgb)
    if results.pose_landmarks:
        # Extract landmarks
        landmarks = []
        for landmark in results.pose_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
        # Normalize landmarks
        landmarks = normalize_landmarks(landmarks)

        if pca_model:
            landmarks = pca_model.transform([landmarks])[0]

        # Classify the image using the extracted landmarks
        landmarks_array = np.array(landmarks).reshape(1, -1)
        classification_result = model.predict(landmarks_array)

        return classification_result
    else:
        print("No pose landmarks detected in the image.")
        return None


In [36]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True)

I0000 00:00:1747106144.915100 1428031 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1747106144.925153 4139957 gl_context.cc:369] GL version: 3.0 (OpenGL ES 3.0 Mesa 24.2.8-1ubuntu1~24.04.1), renderer: D3D12 (Intel(R) UHD Graphics 770)


In [37]:
# Load pca models
pca_models = {}
for file in os.listdir(pca_models_path):
    if file.endswith(".joblib"):
        model_path = os.path.join(pca_models_path, file)
        model = joblib.load(model_path)
        pca_models[file] = model
        print(f"Loaded PCA model from {model_path}")
pca_model = pca_models.pop("pca.joblib")
for file in os.listdir("../test_data"):
    if file.endswith(".jpg") or file.endswith(".png"):
        image_path = os.path.join("../test_data", file)
        for name, model in pca_models.items():
            result = extract_features_and_classify_image(image_path, model, pose, pca_model)
            if result is not None:
                print(f"Image: {file}, PCA Model: {name}, Classification Result: {result}")
            else:
                print(f"Image: {file}, PCA Model: {name}, No landmarks detected.")

    print()

W0000 00:00:1747106145.002763 4139935 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747106145.058849 4139934 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Loaded PCA model from ../models/pca/pca.joblib
Loaded PCA model from ../models/pca/MLPClassifier_pca.joblib
Loaded PCA model from ../models/pca/knn_pca.joblib
Image: watering_test_2.jpg, PCA Model: MLPClassifier_pca.joblib, Classification Result: ['fertilizing']
Image: watering_test_2.jpg, PCA Model: knn_pca.joblib, Classification Result: ['fertilizing']

Image: hoeing_test_3.png, PCA Model: MLPClassifier_pca.joblib, Classification Result: ['checking']
Image: hoeing_test_3.png, PCA Model: knn_pca.joblib, Classification Result: ['checking']

Image: hoeing_test_1.jpg, PCA Model: MLPClassifier_pca.joblib, Classification Result: ['hoeing']
Image: hoeing_test_1.jpg, PCA Model: knn_pca.joblib, Classification Result: ['hoeing']

Image: watering_test.jpg, PCA Model: MLPClassifier_pca.joblib, Classification Result: ['fertilizing']
Image: watering_test.jpg, PCA Model: knn_pca.joblib, Classification Result: ['fertilizing']

Image: checking_test_1.jpg, PCA Model: MLPClassifier_pca.joblib, Classifi

In [38]:
# Process all video files in the test_data directory and classify each frame
video_extensions = (".mp4", ".avi", ".mov", ".mkv")
for video_file in os.listdir("../test_data"):
    if video_file.lower().endswith(video_extensions):
        video_path = os.path.join("../test_data", video_file)
        cap = cv2.VideoCapture(video_path)
        frame_idx = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            # Convert frame to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Detect pose landmarks
            results = pose.process(frame_rgb)
            if results.pose_landmarks:
                landmarks = []
                for landmark in results.pose_landmarks.landmark:
                    landmarks.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
            #     landmarks = normalize_landmarks(landmarks)
            #     landmarks = pca_model.transform([landmarks])[0]
            #     landmarks_array = np.array(landmarks).reshape(1, -1)
            #     classification_result = model.predict(landmarks_array)
            #     print(f"Video: {video_file}, Frame: {frame_idx}, Classification Result: {classification_result}")
            # else:
            #     print(f"Video: {video_file}, Frame: {frame_idx}, No landmarks detected.")
            # frame_idx += 1
            # Draw landmarks on the frame and save to video file
        cap.release()

KeyboardInterrupt: 