In [None]:
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import json

# STEP 2: Create an ObjectDetector object.
base_options = python.BaseOptions(model_asset_path='model.tflite')
options = vision.ObjectDetectorOptions(base_options=base_options,
                                       score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)

# Function to perform object detection and draw bounding boxes on the frame
def detect_and_draw(frame):
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    # STEP 4: Detect objects in the input image.
    detection_result = detector.detect(mp_image)

    detections_list = []

    # Assuming there is only one detection in the result.
    if detection_result.detections:
        detection = detection_result.detections[0]

        # Extracting the bounding box information.
        bounding_box = detection.bounding_box
        origin_x, origin_y, width, height = bounding_box.origin_x, bounding_box.origin_y, bounding_box.width, bounding_box.height

        # Extracting the category information.
        categories = detection.categories
        if categories:
            category = categories[0]
            score, category_name = category.score, category.category_name

            # Creating a dictionary for the detected object.
            detection_info = {
                "origin_x": origin_x,
                "origin_y": origin_y,
                "width": width,
                "height": height,
                "score": score,
                "category_name": category_name
            }

            # Appending the dictionary to the detections_list.
            detections_list.append(detection_info)

    # Iterate through each detected object and draw bounding box and text on the frame
    for detection in detections_list:
        origin_x, origin_y, width, height = detection['origin_x'], detection['origin_y'], detection['width'], detection['height']
        score, category_name = detection['score'], detection['category_name']

        # Draw bounding box
        cv2.rectangle(frame, (origin_x, origin_y), (origin_x + width, origin_y + height), (0, 255, 0), 2)

        # Put text on the frame with category name and score
        text = f"{category_name}: {score:.2f}"
        cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame

# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # If the frame was not captured successfully, break the loop
    if not ret:
        break
    
    # Perform object detection and draw bounding boxes on the frame
    frame_with_detection = detect_and_draw(frame)

    # Display the frame with bounding boxes and information
    cv2.imshow('Detected Objects', frame_with_detection)

    # Exit the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the window
cap.release()
cv2.destroyAllWindows()
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import json

# STEP 2: Create an ObjectDetector object.
base_options = python.BaseOptions(model_asset_path='/media/sunbots/NewVolume/Mediapipe/model_data/model3.tflite')
options = vision.ObjectDetectorOptions(base_options=base_options,
                                       score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)

# Function to perform object detection and draw bounding boxes on the frame
def detect_and_draw(frame):
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    # STEP 4: Detect objects in the input image.
    detection_result = detector.detect(mp_image)

    detections_list = []

    # Assuming there is only one detection in the result.
    if detection_result.detections:
        detection = detection_result.detections[0]

        # Extracting the bounding box information.
        bounding_box = detection.bounding_box
        origin_x, origin_y, width, height = bounding_box.origin_x, bounding_box.origin_y, bounding_box.width, bounding_box.height

        # Extracting the category information.
        categories = detection.categories
        if categories:
            category = categories[0]
            score, category_name = category.score, category.category_name

            # Creating a dictionary for the detected object.
            detection_info = {
                "origin_x": origin_x,
                "origin_y": origin_y,
                "width": width,
                "height": height,
                "score": score,
                "category_name": category_name
            }

            # Appending the dictionary to the detections_list.
            detections_list.append(detection_info)

    # Iterate through each detected object and draw bounding box and text on the frame
    for detection in detections_list:
        origin_x, origin_y, width, height = detection['origin_x'], detection['origin_y'], detection['width'], detection['height']
        score, category_name = detection['score'], detection['category_name']

        # Draw bounding box
        cv2.rectangle(frame, (origin_x, origin_y), (origin_x + width, origin_y + height), (0, 255, 0), 2)

        # Put text on the frame with category name and score
        text = f"{category_name}: {score:.2f}"
        cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame

# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # If the frame was not captured successfully, break the loop
    if not ret:
        break
    
    # Perform object detection and draw bounding boxes on the frame
    frame_with_detection = detect_and_draw(frame)

    # Display the frame with bounding boxes and information
    cv2.imshow('Detected Objects', frame_with_detection)

    # Exit the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the window
cap.release()
cv2.destroyAllWindows()
