# Mediapipe

In [4]:
import mediapipe as mp 
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import numpy as np


In [5]:
# NOTE: Give absolute path of the model
model_path = 'E:/University/Spring 2025/Multimodal User Interfaces/Project/MultiModel-User-Interface-Project/test/efficientdet_lite0.tflite'

# 1. LiveStream Detection using Basic MediaPipe Model (Test 1)

1. Works fine but is not very accurate (have to check with other objects also). 
2. Is fast compared to both EfficientDet-Lite2 Model MobileNetV2 Model 

### OVERALL: Faster, accurate performance compared to all 3 models


In [None]:
# Initialize MediaPipe components
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

#NOTE: Load the model (Give absolute path of the model)
model_path = 'E:/University/Spring 2025/Multimodal User Interfaces/Project/MultiModel-User-Interface-Project/test/efficientdet_lite0.tflite'

# Initialize global detection result
detection_result = None

# Define callback function to store detection results
def print_results(result: DetectionResult, output_image: mp.Image, timestamp_ms: int): # type: ignore
    global detection_result
    detection_result = result  # Store the latest detection result

# Define object detection options
options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    score_threshold=0.3,
    result_callback=print_results
)

# Open webcam
cap = cv2.VideoCapture(0)  # Change index if needed

with ObjectDetector.create_from_options(options) as detector:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        # Convert the frame to RGB format
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Create an mp.Image object
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        # Get the timestamp
        frame_timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # Run detection asynchronously
        detector.detect_async(mp_image, frame_timestamp_ms)

        # Draw detections if available
        if detection_result and detection_result.detections:
            for detection in detection_result.detections:
                bbox = detection.bounding_box
                x, y, w, h = int(bbox.origin_x), int(bbox.origin_y), int(bbox.width), int(bbox.height)
                
                # Draw rectangle around detected object
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                # Get the label and confidence score
                category = detection.categories[0]
                label = category.category_name
                score = category.score
                
                # Put label text
                text = f"{label} ({score:.2f})"
                cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Show the frame
        cv2.imshow("EfficientDet-Lite0 model", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Cleanup
cap.release()
cv2.destroyAllWindows()


# 2. EfficientDet-Lite2 model Advance (Test 2)

1. Super Slow not detecting objects fast takes too much time
2. The frame rate of the screen is also not smooth lags a lot 
### OVERALL: Poor Perfromance than the previous model

In [None]:
# Initialize MediaPipe components
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

#NOTE: Load the model (Give absolute path of the model)
model_path = 'E:/University/Spring 2025/Multimodal User Interfaces/Project/MultiModel-User-Interface-Project/test/efficientdet_lite2.tflite'

# Initialize global detection result
detection_result = None

# Define callback function to store detection results
def print_results(result: DetectionResult, output_image: mp.Image, timestamp_ms: int): # type: ignore
    global detection_result
    detection_result = result  # Store the latest detection result

# Define object detection options
options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    score_threshold=0.3,
    result_callback=print_results
)

# Open webcam
cap = cv2.VideoCapture(0)  # Change index if needed

with ObjectDetector.create_from_options(options) as detector:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        # Convert the frame to RGB format
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Create an mp.Image object
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        # Get the timestamp
        frame_timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # Run detection asynchronously
        detector.detect_async(mp_image, frame_timestamp_ms)

        # Draw detections if available
        if detection_result and detection_result.detections:
            for detection in detection_result.detections:
                bbox = detection.bounding_box
                x, y, w, h = int(bbox.origin_x), int(bbox.origin_y), int(bbox.width), int(bbox.height)
                
                # Draw rectangle around detected object
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                # Get the label and confidence score
                category = detection.categories[0]
                label = category.category_name
                score = category.score
                
                # Put label text
                text = f"{label} ({score:.2f})"
                cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Show the frame
        cv2.imshow(" EfficientDet-Lite2 model", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Cleanup
cap.release()
cv2.destroyAllWindows()


# 3. SSD MobileNetV2 model (Test 3)

1. Better than Lite2 model
2. A little slow than Lite0 model and less accurate also 
3. It is suppose to be better than Lite0 model but have to check qith other objects aswell 

OVERALL: Better than Lite2 model but less accurate than Lite0 model.

In [None]:
# Initialize MediaPipe components
BaseOptions = mp.tasks.BaseOptions
DetectionResult = mp.tasks.components.containers.DetectionResult
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

#NOTE: Load the model (Give absolute path of the model)
model_path = 'E:/University/Spring 2025/Multimodal User Interfaces/Project/MultiModel-User-Interface-Project/test/ssd_mobilenet_v2.tflite'

# Initialize global detection result
detection_result = None

# Define callback function to store detection results
def print_results(result: DetectionResult, output_image: mp.Image, timestamp_ms: int): # type: ignore
    global detection_result
    detection_result = result  # Store the latest detection result

# Define object detection options
options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    max_results=5,
    score_threshold=0.3,
    result_callback=print_results
)

# Open webcam
cap = cv2.VideoCapture(0)  # Change index if needed

with ObjectDetector.create_from_options(options) as detector:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
        
        # Convert the frame to RGB format
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Create an mp.Image object
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        # Get the timestamp
        frame_timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # Run detection asynchronously
        detector.detect_async(mp_image, frame_timestamp_ms)

        # Draw detections if available
        if detection_result and detection_result.detections:
            for detection in detection_result.detections:
                bbox = detection.bounding_box
                x, y, w, h = int(bbox.origin_x), int(bbox.origin_y), int(bbox.width), int(bbox.height)
                
                # Draw rectangle around detected object
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                # Get the label and confidence score
                category = detection.categories[0]
                label = category.category_name
                score = category.score
                
                # Put label text
                text = f"{label} ({score:.2f})"
                cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Show the frame
        cv2.imshow("SSD MobileNetV2 model", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Cleanup
cap.release()
cv2.destroyAllWindows()
