In [2]:
from ultralytics import YOLO
# Load a model
model = YOLO("./runs/segment/train8/weights/best.pt")

In [3]:
from collections import defaultdict

def find_best_result(results):
    total_results = len(results)
    class_counters = defaultdict(int)

    # Step 1: Count the classes detected in each result
    for result_obj in results:
        result_obj = result_obj[0]
        result_classes = set()
        boxes_data = result_obj.boxes.data
        for detection in boxes_data:
            confidence = detection[4]
            cls = int(detection[5])
            if confidence > 0.5 and cls not in result_classes:
                class_counters[cls] += 1
                result_classes.add(cls)

    # Step 2: Identify classes detected in more than half of the results
    required_classes = [cls for cls, count in class_counters.items() if count > total_results / 2]

    # Step 3: Find the best result object
    best_result = None
    highest_total_confidence = 0
    frame_idx = None

    for i, result_obj in enumerate(results):
        detected_classes = set()
        confidences_per_class = defaultdict(float)
        boxes_data = result_obj[0].boxes.data
        for detection in boxes_data:
            confidence = detection[4]
            cls = int(detection[5])
            if cls in required_classes and confidence > 0.5:
                detected_classes.add(cls)
                confidences_per_class[cls] += confidence

        if set(required_classes).issubset(detected_classes):
            total_confidence = sum(confidences_per_class.values())
            if total_confidence > highest_total_confidence:
                highest_total_confidence = total_confidence
                best_result = result_obj
                frame_idx = i

    return frame_idx, best_result

In [11]:
import time
import cv2

# Initialize video capture
cap = cv2.VideoCapture(1)
results = []
best_results = []  # List to store the best results
start_time = time.time()

# Start the video capturing loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    resized_frame = cv2.resize(frame, (640, 480))
    
    # Get detection result
    result = model(resized_frame)
    results.append(result)
    
    # Annotate the frame
    annotated_frame = result[0].plot()
    cv2.imshow("frame", annotated_frame)

    # Get the current time
    current_time = time.time()

    # Check if 5 seconds have passed
    if current_time - start_time >= 5:
        # Compute the best result from the last 5 seconds
        frame_idx, best_result = find_best_result(results)
        
        # Save the best result to the list
        if best_result is not None:
            best_results.append((frame_idx, best_result))
        
        # Reset the time and clear the results for the next 5 seconds
        start_time = current_time
        results = []  # Clear results for the next window

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and destroy windows
cap.release()
cv2.destroyAllWindows()


0: 480x640 2 forcepss, 1 gauze, 1 scissors, 186.0ms
Speed: 5.1ms preprocess, 186.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 gauze, 1 scissors, 159.7ms
Speed: 2.0ms preprocess, 159.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 gauze, 1 scissors, 133.3ms
Speed: 3.0ms preprocess, 133.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 gauze, 1 scissors, 138.2ms
Speed: 1.3ms preprocess, 138.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 gauze, 1 scissors, 133.6ms
Speed: 2.0ms preprocess, 133.6ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 forceps, 1 gauze, 1 scissors, 128.0ms
Speed: 3.0ms preprocess, 128.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 gauze, 1 scissors, 139.1ms
Speed: 2.0ms preprocess, 139.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 

In [6]:
len(best_results)

10

In [13]:
from matplotlib import pyplot as plt

for res in best_results:
    res = res[1]
    # Assuming annotated_frame is the image you want to display
    annotated_frame = res[0].plot()

    # Convert from BGR (OpenCV format) to RGB (Matplotlib format)
    annotated_frame_rgb = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)

    # Display the image using Matplotlib
    plt.imshow(annotated_frame_rgb)
    plt.axis('off')  # Turn off the axis labels
    plt.show()





In [17]:
import time
import cv2
from matplotlib import pyplot as plt
# Initialize video capture
cap = cv2.VideoCapture(1)
results = []
best_results = []  # List to store the best results
start_time = time.time()

# Start the video capturing loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    resized_frame = cv2.resize(frame, (640, 480))
    
    # Get detection result
    result = model(resized_frame, verbose = False)
    results.append(result)
    
    # Annotate the frame
    annotated_frame = result[0].plot()

    cv2.imshow("frame", annotated_frame)

    # Get the current time
    current_time = time.time()

    # Check if 5 seconds have passed
    if current_time - start_time >= 5:
        # Compute the best result from the last 5 seconds
        frame_idx, best_result = find_best_result(results)
        
        # Save the best result to the list
        if best_result is not None:
            # best_results.append((frame_idx, best_result))
            res = best_result
            # Assuming annotated_frame is the image you want to display
            annotated_frame = res[0].plot()

            # Convert from BGR (OpenCV format) to RGB (Matplotlib format)
            annotated_frame_rgb = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)

            # Display the image using Matplotlib
            print(res[0].boxes.cls)
            plt.close('all')
            plt.imshow(annotated_frame_rgb)
            plt.axis('off')  # Turn off the axis labels
            plt.show()
        
        # Reset the time and clear the results for the next 5 seconds
        start_time = current_time
        results = []  # Clear results for the next window

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and destroy windows
cap.release()
cv2.destroyAllWindows()

NameError: name 'a' is not defined