# 1. Import Libraries and Initialize Model


In [1]:
import cv2
import torch
import time
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Load YOLOv5 small model (yolov5s)
model.eval()

# Ensure model works on GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)


Using cache found in C:\Users\aliem/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-12-25 Python-3.11.7 torch-2.5.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
  

# 2. Preprocessing and Frame Handling


In [2]:
# Function to convert the frame from BGR to RGB as YOLO requires RGB images
def preprocess_frame(frame):
    """Convert frame from BGR to RGB as YOLO requires RGB images"""
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return rgb_frame

# 3. Start Video Function:


In [3]:
# Function to process the video or camera feed
def start_video(video_path=None, is_camera=False, resolution=(640, 480)):
    """Start processing video or camera feed"""
    cap = None
    # If there is a video path, load the video
    if video_path:
        cap = cv2.VideoCapture(video_path)  # Load video from path
    # If using the camera, open the default camera
    elif is_camera:
        cap = cv2.VideoCapture(0)  # Use default camera

    # Ensure the video or camera opened successfully
    if not cap.isOpened():
        messagebox.showerror("Error", "Could not open video source.")
        return

    # Ask for the path to save the processed video
    output_path = simpledialog.askstring("Save Video", "Enter path to save the video (e.g. output_video.avi):")
    if not output_path:
        output_path = "output_video.avi"
    
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), 20.0, resolution)

    # Set the confidence threshold for the detection results
    confidence_threshold = float(confidence_var.get())  # Get confidence value from Tkinter

    prev_time = 0  # For calculating FPS
    # Start the frame processing loop
    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            messagebox.showerror("Error", "Failed to capture frame.")
            break

        # Process the frame
        preprocessed_frame = preprocess_frame(frame)

        # Resize the frame to 640x640
        resized_frame = cv2.resize(preprocessed_frame, (640, 640))

        # Perform object detection with YOLOv5
        results = model(resized_frame)

        # Draw bounding boxes and labels on the frame
        for result in results.xyxy[0]:  # xyxy format: (x1, y1, x2, y2, confidence, class)
            x1, y1, x2, y2, conf, cls = result
            if conf >= confidence_threshold:  # Filter detections based on confidence
                label = f"{model.names[int(cls)]}: {conf:.2f}"
                # Draw rectangle around the object
                cv2.rectangle(frame, (int(x1 * frame.shape[1] / 640), int(y1 * frame.shape[0] / 640)),
                              (int(x2 * frame.shape[1] / 640), int(y2 * frame.shape[0] / 640)), (0, 255, 0), 2)
                # Display label on the image
                cv2.putText(frame, label, (int(x1 * frame.shape[1] / 640), int(y1 * frame.shape[0] / 640) - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Calculate and display FPS
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time)
        prev_time = curr_time
        cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display the frame with results
        cv2.putText(frame, "Press 'q' to exit", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.imshow('YOLO Object Detection', frame)

        # Write the frame to the saved video
        out.write(frame)

        # Exit the loop if 'q' is pressed
        if cv2.waitKey(1) == ord('q'):
            break

    # Release the video capture and writer, and close all windows
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# 4. Load Video Function


In [4]:
# Function to open a file dialog to choose a video file
def load_video():
    """Open file dialog to load a video file"""
    file_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4 *.avi *.mov")])
    if file_path:
        start_video(video_path=file_path)

# 5. Use Camera Function


In [5]:
# Function to start using the webcam
def use_camera():
    """Start using the webcam"""
    start_video(is_camera=True)

# 6. Set Up Tkinter User Interface


In [6]:
# Set up the Tkinter window
root = tk.Tk()
root.title("YOLO Object Detection")

# Set up the confidence threshold input
confidence_var = tk.StringVar(value="0.5")
confidence_label = tk.Label(root, text="Confidence Threshold (0-1):")
confidence_label.pack()

confidence_entry = tk.Entry(root, textvariable=confidence_var)
confidence_entry.pack()

# Buttons to load the video or use the camera
load_video_button = tk.Button(root, text="Load Video", command=load_video)
load_video_button.pack(pady=10)

use_camera_button = tk.Button(root, text="Use Camera", command=use_camera)
use_camera_button.pack(pady=10)

# Start the Tkinter main loop
root.mainloop()

  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with a

In [None]:
import cv2
import torch
import time
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Load YOLOv5 small model (yolov5s)
model.eval()

# Ensure model works on GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

# -----------------------------
# Frame Processing Function
def preprocess_frame(frame):
    """Convert frame from BGR to RGB as YOLO requires RGB images"""
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return rgb_frame

# -----------------------------
# Start Video Function
def start_video(video_path=None, is_camera=False, resolution=(640, 480)):
    """Start processing video or camera feed"""
    cap = None
    if video_path:
        cap = cv2.VideoCapture(video_path)  # Load video from path
    elif is_camera:
        cap = cv2.VideoCapture(0)  # Use default camera

    if not cap.isOpened():
        messagebox.showerror("Error", "Could not open video source.")
        return

    # Set up video output
    output_path = simpledialog.askstring("Save Video", "Enter path to save the video (e.g. output_video.avi):")
    if not output_path:
        output_path = "output_video.avi"
    
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), 20.0, resolution)

    # Set confidence threshold for results
    confidence_threshold = float(confidence_var.get())  # Get confidence value from Tkinter

    prev_time = 0  # For FPS calculation
    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            messagebox.showerror("Error", "Failed to capture frame.")
            break

        # Process the frame
        preprocessed_frame = preprocess_frame(frame)

        # Resize frame to 640x640
        resized_frame = cv2.resize(preprocessed_frame, (640, 640))

        # Perform object detection using YOLOv5
        results = model(resized_frame)

        # Draw bounding boxes and labels on the frame
        for result in results.xyxy[0]:  # xyxy format: (x1, y1, x2, y2, confidence, class)
            x1, y1, x2, y2, conf, cls = result
            if conf >= confidence_threshold:  # Filter detections based on confidence
                label = f"{model.names[int(cls)]}: {conf:.2f}"
                # Draw bounding box around the object
                cv2.rectangle(frame, (int(x1 * frame.shape[1] / 640), int(y1 * frame.shape[0] / 640)),
                              (int(x2 * frame.shape[1] / 640), int(y2 * frame.shape[0] / 640)), (0, 255, 0), 2)
                # Display the label on the image
                cv2.putText(frame, label, (int(x1 * frame.shape[1] / 640), int(y1 * frame.shape[0] / 640) - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Calculate and display FPS
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time)
        prev_time = curr_time
        cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display the frame with results
        cv2.putText(frame, "Press 'q' to exit", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.imshow('YOLO Object Detection', frame)

        # Write the frame to the video file
        out.write(frame)

        # Exit the loop when 'q' is pressed
        if cv2.waitKey(1) == ord('q'):
            
            break

    # Release camera and close windows
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# -----------------------------
# Load Video Function
def load_video():
    """Open file dialog to load a video file"""
    file_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4 *.avi *.mov")])
    if file_path:
        start_video(video_path=file_path)

# -----------------------------
# Use Camera Function
def use_camera():
    """Start using the webcam"""
    start_video(is_camera=True)

# -----------------------------
# Setup GUI using Tkinter
root = tk.Tk()
root.title("YOLO Object Detection")

# Confidence Threshold
confidence_var = tk.StringVar(value="0.5")
confidence_label = tk.Label(root, text="Confidence Threshold (0-1):")
confidence_label.pack()

confidence_entry = tk.Entry(root, textvariable=confidence_var)
confidence_entry.pack()

# Buttons to load video or use camera
load_video_button = tk.Button(root, text="Load Video", command=load_video)
load_video_button.pack(pady=10)

use_camera_button = tk.Button(root, text="Use Camera", command=use_camera)
use_camera_button.pack(pady=10)

# Run the Tkinter main loop
root.mainloop()


Using cache found in C:\Users\aliem/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-12-25 Python-3.11.7 torch-2.5.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp

# Project Summary:
The project is a Python application that utilizes the YOLOv5 (You Only Look Once version 5) model for real-time object detection in video streams or camera feeds. It employs several libraries, including OpenCV for video processing, PyTorch for deep learning, and Tkinter for creating a graphical user interface (GUI).

### Key Components of the Project:

1. **Model Initialization**: 
   - The YOLOv5 small model (`yolov5s`) is loaded using PyTorch's `torch.hub`. The model is set to evaluation mode and configured to run on a GPU if available.

2. **Frame Preprocessing**: 
   - A function converts video frames from BGR (OpenCV's default) to RGB format, which is required by the YOLO model.

3. **Video Processing**:
   - The `start_video` function handles video input, either from a file or a webcam. It captures frames, processes them for object detection, and displays the results in real-time.
   - Detected objects are highlighted with bounding boxes and labels, and the application calculates and displays the frames per second (FPS).

4. **User Interface**:
   - A simple GUI is created using Tkinter, allowing users to set a confidence threshold for detections, load video files, or use the webcam.
   - Users can specify the output path for saving processed video.

5. **Error Handling**:
   - The application includes error handling to manage issues such as failure to open video sources or capture frames.

6. **Future Warnings**:
   - The code generates warnings regarding deprecated functions in the PyTorch library, suggesting updates to the usage of automatic mixed precision.

Overall, the project provides a functional tool for real-time object detection, demonstrating the integration of deep learning models with video processing and user-friendly interfaces.