In [None]:
#test affichage classes + camera

from typing import Any, List
from ultralytics import YOLO
from inference import InferencePipeline
from inference.core.interfaces.stream.sinks import render_boxes
from inference.core.interfaces.camera.entities import VideoFrame
import numpy as np
import cv2

class VideoFrameWithPredictions:
    def __init__(self, video_frame: VideoFrame, predictions: dict = None):
        self.video_frame = video_frame
        self.predictions = predictions or {}

    @property
    def image(self):
        return self.video_frame.image

    @property
    def frame_id(self):
        return self.video_frame.frame_id

    @property
    def frame_timestamp(self):
        return self.video_frame.frame_timestamp

class MyModel:

    def __init__(self, weights_path: str):
        # Load the YOLO model
        self._model = YOLO(weights_path)
        print("Modèle chargé avec succès")

    def infer(self, video_frames: List[VideoFrame]) -> List[VideoFrameWithPredictions]:
        print('Video frames:', len(video_frames))
        
        # Convert the list of images to the format expected by YOLO
        images = [v.image for v in video_frames]

        # Convert images to numpy arrays
        images_np = [np.array(img) for img in images]

        # Make predictions
        results = self._model(images_np)

        # Create a list to store enriched VideoFrames
        enriched_video_frames = []

        # Update video frames with predictions
        for i, result in enumerate(results):
            boxes = result.boxes.xyxy.tolist() if result.boxes else []
            scores = result.boxes.conf.tolist() if result.boxes else []
            class_indices = result.boxes.cls.tolist() if result.boxes else []

            # Create a new VideoFrameWithPredictions
            enriched_frame = VideoFrameWithPredictions(
                video_frame=video_frames[i],
                predictions={
                    'boxes': boxes,
                    'scores': scores,
                    'class_indices': class_indices
                }
            )

            enriched_video_frames.append(enriched_frame)

        return enriched_video_frames

def render_boxes_on_frame(video_frame_with_predictions: VideoFrameWithPredictions) -> VideoFrame:
    # convertir l'image en numpy array
    image = np.array(video_frame_with_predictions.image)
    predictions = video_frame_with_predictions.predictions

    if predictions:
        boxes = predictions['boxes']
        scores = predictions['scores']
        class_indices = predictions['class_indices']

        for box, score, class_idx in zip(boxes, scores, class_indices):
            x1, y1, x2, y2 = map(int, box)
            label = f'Classe: {class_idx}, Probabilité: {score:.2f}'
        
            # dessiner le rectangle autour de l'objet détecté
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
        
            # mettre le label 
            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    
    video_frame_with_predictions.video_frame.image = image
    return video_frame_with_predictions.video_frame

my_model = MyModel("../models/shifumi_trained_yolo9c.pt")
pipeline = InferencePipeline.init_with_custom_logic(
    on_video_frame=my_model.infer,
    video_reference=0,  # Ensure this is the correct device ID for your webcam
    on_prediction=render_boxes_on_frame,  # Function to run after each prediction
)

pipeline.start()
pipeline.join()


In [None]:
import cv2
from typing import Any, List
from ultralytics import YOLO
from inference import InferencePipeline
from inference.core.interfaces.camera.entities import VideoFrame
import numpy as np

class MyModel:
    def __init__(self, weights_path: str):
        # Load the YOLO model
        self._model = YOLO(weights_path)
        print("Modèle chargé avec succès")

    def infer(self, video_frames: List[VideoFrame]) -> List[dict]:
        print('Video frames:', len(video_frames))
        
        # Convert the list of images to the format expected by YOLO
        images = [np.array(v.image) for v in video_frames]

        # Make predictions
        results = self._model(images)

        # Create a list to store predictions for each frame
        predictions = []

        for result in results:
            boxes = result.boxes.xyxy.tolist() if result.boxes else []
            scores = result.boxes.conf.tolist() if result.boxes else []
            class_indices = result.boxes.cls.tolist() if result.boxes else []

            prediction = {
                'boxes': boxes,
                'scores': scores,
                'class_indices': class_indices
            }
            predictions.append(prediction)

        return predictions

def render_boxes_on_frame(video_frame: VideoFrame, prediction: dict) -> VideoFrame:
    # Convert the video frame to a numpy array
    image = np.array(video_frame.image)

    if prediction:
        boxes = prediction['boxes']
        scores = prediction['scores']
        class_indices = prediction['class_indices']

        for box, score, class_idx in zip(boxes, scores, class_indices):
            x1, y1, x2, y2 = map(int, box)
            label = f'Classe: {class_idx}, Probabilité: {score:.2f}'
        
            # Draw the bounding box
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
        
            # Put the label near the bounding box
            cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    # Show the image with bounding boxes
    cv2.imshow("Video Frame", image)
    cv2.waitKey(1)  # Add a small delay to allow the image to be rendered

    # Update the video_frame with the new image containing the drawn boxes
    video_frame.image = image
    return video_frame

my_model = MyModel("../models/shifumi_trained_yolo9c.pt")

def on_prediction(video_frames: List[VideoFrame], predictions: List[dict]) -> List[VideoFrame]:
    enriched_frames = []
    for video_frame, prediction in zip(video_frames, predictions):
        enriched_frame = render_boxes_on_frame(video_frame, prediction)
        enriched_frames.append(enriched_frame)
    return enriched_frames

pipeline = InferencePipeline.init_with_custom_logic(
    on_video_frame=my_model.infer,
    video_reference=0,  # Ensure this is the correct device ID for your webcam
    on_prediction=on_prediction,  # Function to run after each prediction
)

pipeline.start()
pipeline.join()
