In [1]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]


In [3]:
model = YOLO("yolov8n.pt")

In [8]:
# Open webcam for real-time object detection
cap = cv2.VideoCapture(1)  # 0 for default webcam

In [9]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame from BGR to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Run YOLOv8 object detection
    results = model(frame_rgb)
    
    # Draw detections on frame
    for result in results:
        boxes = result.boxes.xyxy  # Bounding box coordinates
        classes = result.boxes.cls  # Class labels
        confidences = result.boxes.conf  # Confidence scores
        
        for i in range(len(boxes)):
            x1, y1, x2, y2 = map(int, boxes[i])
            label = model.names[int(classes[i])]
            confidence = confidences[i]
            
            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}: {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    # Show frame
    cv2.imshow("YOLOv8 Real-Time Detection", frame)
    
    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


0: 384x640 (no detections), 59.0ms
Speed: 2.8ms preprocess, 59.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 43.5ms
Speed: 1.5ms preprocess, 43.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 39.5ms
Speed: 1.3ms preprocess, 39.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 40.3ms
Speed: 1.3ms preprocess, 40.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 38.1ms
Speed: 1.4ms preprocess, 38.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 33.3ms
Speed: 1.2ms preprocess, 33.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 35.1ms
Speed: 1.4ms preprocess, 35.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 32.5ms
Speed: 1.3ms preprocess, 32.5ms inference, 0.4ms postprocess per image at s

In [10]:
cap.release()
cv2.destroyAllWindows()

In [11]:
import cv2

for i in range(10):  # Check the first 10 indexes
    cap = cv2.VideoCapture(i)
    if cap.isOpened():
        print(f"Camera found at index {i}")
        cap.release()


Camera found at index 0


OpenCV: out device of bound (0-0): 1
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 2
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 3
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 4
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 5
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 6
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 7
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 8
OpenCV: camera failed to properly initialize!
OpenCV: out device of bound (0-0): 9
OpenCV: camera failed to properly initialize!
