In [1]:
pip install mediapipe opencv-python





[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import cv2
import mediapipe as mp

# Initialize MediaPipe drawing and objectron solutions
mp_drawing = mp.solutions.drawing_utils
mp_objectron = mp.solutions.objectron

# Initialize video capture
cap = cv2.VideoCapture(0)

# Create Objectron instances for each object type
objectron_models = {
    'Cup': mp_objectron.Objectron(static_image_mode=False, max_num_objects=5, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_name='Cup'),
    'Shoe': mp_objectron.Objectron(static_image_mode=False, max_num_objects=5, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_name='Shoe'),
    'Chair': mp_objectron.Objectron(static_image_mode=False, max_num_objects=5, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_name='Chair'),
    'Camera': mp_objectron.Objectron(static_image_mode=False, max_num_objects=5, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_name='Camera')
}

try:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert BGR image to RGB for MediaPipe processing
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process each model separately and draw results
        for obj_type, model in objectron_models.items():
            results = model.process(rgb_frame)

            # Draw results for each detected object
            if results.detected_objects:
                for detected_object in results.detected_objects:
                    # Draw bounding box and 3D landmarks on the object
                    mp_drawing.draw_landmarks(
                        frame, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
                    mp_drawing.draw_axis(frame, detected_object.rotation, detected_object.translation)

                    # Label each detected object with its type (e.g., 'Cup', 'Shoe')
                    label_text = f"Object: {obj_type}"
                    bbox_coordinates = detected_object.landmarks_2d.landmark[0]
                    cv2.putText(frame, label_text, 
                                (int(bbox_coordinates.x * frame.shape[1]), int(bbox_coordinates.y * frame.shape[0]) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        # Display output frame
        cv2.imshow('MediaPipe Objectron - Multi-Object Detection & Tracking', frame)
        
        # Break loop if 'q' is pressed
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

finally:
    # Release Objectron models and video capture
    for model in objectron_models.values():
        model.close()
    cap.release()
    cv2.destroyAllWindows()
