In [11]:
import streamlit as st
import cv2
from ultralytics import YOLO
import tempfile
import time
import os

# Streamlit app title
st.title("Real-Time Instance Segmentation with YOLOv8")

# Sidebar for options
st.sidebar.header("Options")
task = st.sidebar.radio("Choose Task", ["Real-Time Webcam", "Upload Video/Image"])

# Model selection
model_name = st.sidebar.selectbox(
    "Select YOLOv8 Model",
    ["yolov8n-seg.pt", "yolov8s-seg.pt", "yolov8m-seg.pt", "yolov8l-seg.pt", "yolov8x-seg.pt"],
    index=2,  # Default to YOLOv8m-seg
)

# Confidence threshold slider
confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.25, 0.01)

# Load the selected YOLOv8 model
model = YOLO(model_name)

# Function to perform instance segmentation on a frame and calculate FPS/object counts
def segment_frame(frame, prev_time):
    # Perform instance segmentation with confidence threshold
    results = model(frame, conf=confidence_threshold)
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Get detected objects and count them
    detections = results[0].boxes.data
    class_counts = {}
    for detection in detections:
        class_id = int(detection[-1])
        class_name = model.names[class_id]
        class_counts[class_name] = class_counts.get(class_name, 0) + 1

    # Draw FPS and object counts on the frame
    cv2.putText(annotated_frame, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    for i, (class_name, count) in enumerate(class_counts.items()):
        cv2.putText(annotated_frame, f"{class_name}: {count}", (10, 60 + 30 * i), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    return annotated_frame, prev_time, class_counts

# Real-Time Webcam Task
if task == "Real-Time Webcam":
    st.header("Real-Time Webcam Segmentation")
    start_button = st.button("Start Webcam")

    if start_button:
        st.write("Webcam started. Press 'Stop Webcam' to stop.")
        stop_button = st.button("Stop Webcam")

        # Open the webcam
        cap = cv2.VideoCapture(0)
        frame_placeholder = st.empty()
        prev_time = 0

        while cap.isOpened() and not stop_button:
            ret, frame = cap.read()
            if not ret:
                st.error("Failed to capture video.")
                break

            # Perform instance segmentation and get FPS/object counts
            annotated_frame, prev_time, class_counts = segment_frame(frame, prev_time)

            # Display the annotated frame
            frame_placeholder.image(annotated_frame, channels="BGR", use_column_width=True)

            # Display object counts in the sidebar
            st.sidebar.header("Object Counts (Real-Time)")
            for class_name, count in class_counts.items():
                st.sidebar.write(f"{class_name}: {count}")

            # Check if the stop button is pressed
            if stop_button:
                break

        # Release the webcam
        cap.release()
        st.write("Webcam stopped.")

# Upload Video/Image Task
elif task == "Upload Video/Image":
    st.header("Upload Video or Image for Segmentation")
    uploaded_file = st.file_uploader("Upload a video or image", type=["jpg", "jpeg", "png", "mp4"])

    if uploaded_file is not None:
        # Save the uploaded file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
            temp_file.write(uploaded_file.read())
            file_path = temp_file.name

        # Check if the file is an image or video
        if uploaded_file.type.startswith("image"):
            # Process image
            st.write("Processing image...")
            frame = cv2.imread(file_path)
            prev_time = time.time()
            annotated_frame, _, class_counts = segment_frame(frame, prev_time)

            # Display the segmented image
            st.image(annotated_frame, channels="BGR", caption="Segmented Image", use_column_width=True)

            # Display object counts in the sidebar
            st.sidebar.header("Object Counts (Image)")
            for class_name, count in class_counts.items():
                st.sidebar.write(f"{class_name}: {count}")

            # Add a download button for the segmented image
            if st.button("Download Segmented Image"):
                output_path = "segmented_image.jpg"
                cv2.imwrite(output_path, annotated_frame)
                with open(output_path, "rb") as file:
                    st.download_button(
                        label="Download Image",
                        data=file,
                        file_name="segmented_image.jpg",
                        mime="image/jpeg",
                    )
                os.remove(output_path)

        elif uploaded_file.type.startswith("video"):
            # Process video
            st.write("Processing video...")
            cap = cv2.VideoCapture(file_path)
            frame_placeholder = st.empty()
            prev_time = time.time()

            # Create a temporary file for the segmented video
            output_video_path = "segmented_video.mp4"
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            out = cv2.VideoWriter(output_video_path, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Perform instance segmentation and get FPS/object counts
                annotated_frame, prev_time, class_counts = segment_frame(frame, prev_time)

                # Write the annotated frame to the output video
                out.write(annotated_frame)

                # Display the annotated frame
                frame_placeholder.image(annotated_frame, channels="BGR", use_column_width=True)

                # Display object counts in the sidebar
                st.sidebar.header("Object Counts (Video)")
                for class_name, count in class_counts.items():
                    st.sidebar.write(f"{class_name}: {count}")

                # Add a small delay to simulate real-time playback
                time.sleep(0.03)

            # Release the video capture and writer
            cap.release()
            out.release()
            st.write("Video processing complete.")

            # Add a download button for the segmented video
            if st.button("Download Segmented Video"):
                with open(output_video_path, "rb") as file:
                    st.download_button(
                        label="Download Video",
                        data=file,
                        file_name="segmented_video.mp4",
                        mime="video/mp4",
                    )
                os.remove(output_video_path)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-seg.pt to 'yolov8m-seg.pt'...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 52.4M/52.4M [02:08<00:00, 429kB/s]



0: 480x640 1 person, 394.7ms
Speed: 3.0ms preprocess, 394.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 227.2ms
Speed: 2.1ms preprocess, 227.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 220.9ms
Speed: 1.0ms preprocess, 220.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 240.3ms
Speed: 1.0ms preprocess, 240.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 223.0ms
Speed: 1.4ms preprocess, 223.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 221.8ms
Speed: 1.0ms preprocess, 221.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 217.1ms
Speed: 1.0ms preprocess, 217.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 217.1ms
Speed: 1.0ms preprocess, 217.1ms inference, 1.0ms postprocess per image at

In [9]:
!nvidia-smi

Fri Jan 17 17:45:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 556.13                 Driver Version: 556.13         CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   40C    P8              2W /   35W |       0MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [1]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should return "NVIDIA GeForce RTX 4050"

True
NVIDIA GeForce RTX 4050 Laptop GPU


In [6]:
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)   

2.5.1+cu124
0.20.1+cpu
True
12.4


In [7]:
import torch
import torchvision

# Create dummy data
boxes = torch.tensor([[0, 0, 100, 100], [50, 50, 150, 150]], dtype=torch.float32).cuda()
scores = torch.tensor([0.9, 0.8], dtype=torch.float32).cuda()

# Perform NMS
iou_threshold = 0.5
keep_indices = torchvision.ops.nms(boxes, scores, iou_threshold)
print(keep_indices)  # Should return tensor([0, 1], device='cuda:0')

NotImplementedError: Could not run 'torchvision::nms' with arguments from the 'CUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'torchvision::nms' is only available for these backends: [CPU, Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

CPU: registered at C:\actions-runner\_work\vision\vision\pytorch\vision\torchvision\csrc\ops\cpu\nms_kernel.cpp:112 [kernel]
Meta: registered at /dev/null:184 [kernel]
QuantizedCPU: registered at C:\actions-runner\_work\vision\vision\pytorch\vision\torchvision\csrc\ops\quantized\cpu\qnms_kernel.cpp:124 [kernel]
BackendSelect: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\autograd\TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: registered at C:\actions-runner\_work\vision\vision\pytorch\vision\torchvision\csrc\ops\autocast\nms_kernel.cpp:34 [kernel]
AutocastXPU: registered at C:\actions-runner\_work\vision\vision\pytorch\vision\torchvision\csrc\ops\autocast\nms_kernel.cpp:41 [kernel]
AutocastMPS: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: registered at C:\actions-runner\_work\vision\vision\pytorch\vision\torchvision\csrc\ops\autocast\nms_kernel.cpp:27 [kernel]
FuncTorchBatched: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\functorch\DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\core\PythonFallbackKernel.cpp:157 [backend fallback]
