In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.55-py3-none-any.whl.metadata (35 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Collecting sympy==1.13.1 (from torch>=1.8.0->ultralytics)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading ultralytics-8.3.55-py3-none-any.whl (904 kB)
   ---------------------------------------- 0.0/904.3 kB ? eta -:--:--
   ---------------------------------------- 10.2/904.3 kB ? eta -:--:--
   - ------------------------------------- 30.7/904.3 kB 220.2 kB/s eta 0:00:04
   --- -----

In [None]:
import torch
from ultralytics import YOLO
import cv2
import time
from tkinter import Tk, Button, Label, filedialog, StringVar, OptionMenu, messagebox
from PIL import Image, ImageTk
import threading

In [15]:


# Global variables
cap = None
output_path = "output_video.avi"
model = YOLO('yolov8n.pt')  # Default model

# Function to start webcam object detection
def start_webcam():
    global cap, output_path, model

    # Open the webcam
    cap = cv2.VideoCapture(0)  # 0 for the default webcam

    # Check if the webcam is opened successfully
    if not cap.isOpened():
        messagebox.showerror("Error", "Could not open webcam.")
        return

    # Initialize variables for FPS calculation
    prev_time = 0

    # Initialize video writer to save the output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, 20.0, (640, 480))  # Adjust resolution if needed

    # Update status
    status_label.config(text="Status: Running")

    # Loop to capture frames from the webcam
    while cap.isOpened():
        # Read a frame from the webcam
        ret, frame = cap.read()
        if not ret:
            messagebox.showerror("Error", "Failed to capture image.")
            break

        # Perform object detection on the frame
        results = model(frame)

        # Get the annotated frame with bounding boxes and labels
        annotated_frame = results[0].plot()

        # Count the number of detected objects
        num_objects = len(results[0].boxes)
        cv2.putText(annotated_frame, f'Objects: {num_objects}', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Calculate FPS
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time)
        prev_time = curr_time

        # Display FPS on the annotated frame
        cv2.putText(annotated_frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Display the annotated frame in the GUI
        annotated_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(annotated_frame)
        img = ImageTk.PhotoImage(image=img)
        video_label.config(image=img)
        video_label.image = img

        # Save the annotated frame to the output video
        out.write(annotated_frame)

        # Break the loop if the stop flag is set
        if stop_flag.is_set():
            break

    # Release the webcam, video writer, and close all OpenCV windows
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # Update status
    status_label.config(text="Status: Stopped")

# Function to stop webcam object detection
def stop_webcam():
    global stop_flag
    stop_flag.set()
    if cap is not None:
        cap.release()

# Function to select output video path
def select_output_path():
    global output_path
    output_path = filedialog.asksaveasfilename(defaultextension=".avi", filetypes=[("AVI files", "*.avi")])
    if output_path:
        path_label.config(text=f"Save Path: {output_path}")

# Function to change the model
def change_model(*args):
    global model
    model = YOLO(model_var.get())

# Create the main GUI window
root = Tk()
root.title("YOLOv8 Webcam Detection")
root.geometry("800x600")  # Larger window size

# Stop flag for threading
stop_flag = threading.Event()

# Model selection dropdown
model_var = StringVar(value="yolov8n.pt")
model_options = ["yolov8n.pt", "yolov8s.pt", "yolov8m.pt", "yolov8l.pt", "yolov8x.pt"]
model_menu = OptionMenu(root, model_var, *model_options, command=change_model)
model_menu.pack(pady=10)

# Button to select output video path
path_label = Label(root, text="Save Path: output_video.avi")
path_label.pack(pady=5)
Button(root, text="Select Save Path", command=select_output_path).pack(pady=5)

# Label to display webcam feed
video_label = Label(root)
video_label.pack(pady=10)

# Buttons to start and stop the webcam
Button(root, text="Start Webcam", command=lambda: threading.Thread(target=start_webcam).start()).pack(pady=10)
Button(root, text="Stop Webcam", command=stop_webcam).pack(pady=10)

# Status label
status_label = Label(root, text="Status: Stopped", font=("Arial", 12))
status_label.pack(pady=10)

# Run the GUI main loop
root.mainloop()


0: 480x640 1 person, 137.4ms
Speed: 0.0ms preprocess, 137.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 101.5ms
Speed: 0.0ms preprocess, 101.5ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 99.7ms
Speed: 0.0ms preprocess, 99.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 99.9ms
Speed: 0.0ms preprocess, 99.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 101.1ms
Speed: 3.0ms preprocess, 101.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 apple, 101.2ms
Speed: 1.5ms preprocess, 101.2ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 101.4ms
Speed: 0.0ms preprocess, 101.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 100.4ms
Speed: 2.9ms preprocess, 100.4ms inference, 0.0ms postprocess per ima