In [1]:
from ultralytics import YOLO

# Load YOLOv8 pre-trained model
model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt' for a smaller model, or a custom model trained for human detection


In [2]:
!git clone https://github.com/nwojke/deep_sort.git


Cloning into 'deep_sort'...


In [7]:
import cv2
from ultralytics import YOLO
from deep_sort.deep_sort_app import DeepSort
import torch

# Load YOLOv8 model
yolo_model = YOLO('yolov8n.pt')  # YOLOv8 model

# Initialize DeepSORT
deepsort = DeepSort(model_path='ckpt.t7')  # Path to DeepSORT model weights

# Load video
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLOv8 detection
    results = yolo_model(frame)
    detections = results.xywh[0]  # Bounding boxes with xywh format

    # Filter detections for humans (class 0 in COCO dataset is for 'person')
    humans = [det for det in detections if det[5] == 0]

    # Convert detections to the format required by DeepSORT
    bbox_xywh = [[x1, y1, w, h] for x1, y1, w, h, conf, cls in humans]
    confs = [conf for x1, y1, w, h, conf, cls in humans]

    # Perform tracking with DeepSORT
    outputs = deepsort.update(bbox_xywh, confs, frame)

    # Draw results
    for output in outputs:
        x1, y1, x2, y2, track_id = output
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Show frame
    cv2.imshow('Frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


ImportError: cannot import name 'nn_matching' from 'deep_sort' (unknown location)

In [8]:
import cv2
import numpy as np
from ultralytics import YOLO

# Function to compute the Intersection over Union (IoU) between two boxes
def compute_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    # Compute the coordinates of the intersection box
    x_min = max(x1_min, x2_min)
    y_min = max(y1_min, y2_min)
    x_max = min(x1_max, x2_max)
    y_max = min(y1_max, y2_max)

    # Compute the area of intersection
    inter_area = max(0, x_max - x_min) * max(0, y_max - y_min)

    # Compute the area of both boxes
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)

    # Compute the Intersection over Union (IoU)
    iou = inter_area / (box1_area + box2_area - inter_area)
    return iou

# Function to assign track IDs to new detections based on previous detections
def assign_track_ids(detections, previous_detections, threshold=0.5):
    new_tracks = []
    for detection in detections:
        x1, y1, x2, y2, conf, cls = detection
        best_iou = 0
        best_track_id = None

        # Check for previous detection with the highest IoU
        for prev_det in previous_detections:
            prev_box = prev_det[:4]
            prev_track_id = prev_det[4]
            iou = compute_iou([x1, y1, x2, y2], prev_box)

            if iou > best_iou and iou > threshold:
                best_iou = iou
                best_track_id = prev_track_id

        if best_track_id is not None:
            new_tracks.append([x1, y1, x2, y2, best_track_id])
        else:
            # If no match, assign a new track ID
            new_track_id = len(previous_detections) + 1
            new_tracks.append([x1, y1, x2, y2, new_track_id])

    return new_tracks

# Load YOLOv8 model
model = YOLO('yolov8n.pt')  # YOLOv8 model (nano version for speed)

# Initialize video capture
cap = cv2.VideoCapture(0)

# To store previous detections for tracking
previous_detections = []

# To store the unique track IDs for counting
unique_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLOv8 detection
    results = model(frame)
    detections = results.xyxy[0].cpu().numpy()  # YOLOv8 bounding boxes in xyxy format

    # Filter detections for humans (class 0 in COCO dataset is 'person')
    humans = [det for det in detections if int(det[5]) == 0]

    # Assign track IDs based on previous frame detections
    tracked_humans = assign_track_ids(humans, previous_detections)

    # Update previous detections for next frame
    previous_detections = tracked_humans

    # Track new IDs and update total count
    for tracked_human in tracked_humans:
        x1, y1, x2, y2, track_id = tracked_human
        unique_ids.add(track_id)

        # Draw bounding boxes and track IDs
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Show the total count of unique humans
    total_count = len(unique_ids)
    cv2.putText(frame, f"Total Count: {total_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame with tracked humans and total count
    cv2.imshow('Frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 114.7ms
Speed: 3.0ms preprocess, 114.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


AttributeError: 'list' object has no attribute 'xyxy'

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

# Function to compute the Intersection over Union (IoU) between two boxes
def compute_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    # Compute the coordinates of the intersection box
    x_min = max(x1_min, x2_min)
    y_min = max(y1_min, y2_min)
    x_max = min(x1_max, x2_max)
    y_max = min(y1_max, y2_max)

    # Compute the area of intersection
    inter_area = max(0, x_max - x_min) * max(0, y_max - y_min)

    # Compute the area of both boxes
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)

    # Compute the Intersection over Union (IoU)
    iou = inter_area / (box1_area + box2_area - inter_area)
    return iou

# Function to assign track IDs to new detections based on previous detections
def assign_track_ids(detections, previous_detections, threshold=0.5):
    new_tracks = []
    for detection in detections:
        x1, y1, x2, y2, conf, cls = detection
        best_iou = 0
        best_track_id = None

        # Check for previous detection with the highest IoU
        for prev_det in previous_detections:
            prev_box = prev_det[:4]
            prev_track_id = prev_det[4]
            iou = compute_iou([x1, y1, x2, y2], prev_box)

            if iou > best_iou and iou > threshold:
                best_iou = iou
                best_track_id = prev_track_id

        if best_track_id is not None:
            new_tracks.append([x1, y1, x2, y2, best_track_id])
        else:
            # If no match, assign a new track ID
            new_track_id = len(previous_detections) + 1
            new_tracks.append([x1, y1, x2, y2, new_track_id])

    return new_tracks

# Load YOLOv8 model
model = YOLO('yolov8n.pt')  # YOLOv8 model (nano version for speed)

# Initialize webcam capture (use 0 for the default webcam, or change it for another camera)
cap = cv2.VideoCapture(0)

# To store previous detections for tracking
previous_detections = []

# To store the unique track IDs for counting
unique_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLOv8 detection
    results = model(frame)

    # Accessing the list of detections directly (this is a list of dictionaries)
    detections = results[0].boxes.boxes.cpu().numpy()  # YOLOv8 bounding boxes in xyxy format

    # Filter detections for humans (class 0 in COCO dataset is 'person')
    humans = [det for det in detections if int(det[5]) == 0]

    # Assign track IDs based on previous frame detections
    tracked_humans = assign_track_ids(humans, previous_detections)

    # Update previous detections for next frame
    previous_detections = tracked_humans

    # Track new IDs and update total count
    for tracked_human in tracked_humans:
        x1, y1, x2, y2, track_id = tracked_human
        unique_ids.add(track_id)

        # Draw bounding boxes and track IDs
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Show the total count of unique humans
    total_count = len(unique_ids)
    cv2.putText(frame, f"Total Count: {total_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame with tracked humans and total count
    cv2.imshow('Webcam', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 1 person, 109.1ms
Speed: 2.0ms preprocess, 109.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


AttributeError: 'Boxes' object has no attribute 'boxes'. See valid attributes below.

    A class for managing and manipulating detection boxes.

    This class provides functionality for handling detection boxes, including their coordinates, confidence scores,
    class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation
    and conversion between different coordinate systems.

    Attributes:
        data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data.
        orig_shape (Tuple[int, int]): The original image dimensions (height, width).
        is_track (bool): Indicates whether tracking IDs are included in the box data.
        xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
        conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
        cls (torch.Tensor | numpy.ndarray): Class labels for each box.
        id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available).
        xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format.
        xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
        xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.

    Methods:
        cpu(): Returns a copy of the object with all tensors on CPU memory.
        numpy(): Returns a copy of the object with all tensors as numpy arrays.
        cuda(): Returns a copy of the object with all tensors on GPU memory.
        to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype.

    Examples:
        >>> import torch
        >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]])
        >>> orig_shape = (480, 640)  # height, width
        >>> boxes = Boxes(boxes_data, orig_shape)
        >>> print(boxes.xyxy)
        >>> print(boxes.conf)
        >>> print(boxes.cls)
        >>> print(boxes.xywhn)
    

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import cvzone
import numpy as np

def RGB(event, x, y, flags, param):
    if event == cv2.EVENT_MOUSEMOVE:
        point = [x, y]
        print(point)

cv2.namedWindow('RGB')
cv2.setMouseCallback('RGB', RGB)

# Load the YOLO11 model
model = YOLO("yolo11s.pt")
names=model.model.names
# Open the video file (use video file or webcam, here using webcam)
cap = cv2.VideoCapture(0)
count=0
cy1=261
cy2=286
offset=8
inp={}
enter=[]
exp={}
exitp=[]
while True:
    ret,frame = cap.read()
    if not ret:
        break
    count += 1
    if count % 3 != 0:
        continue

    frame = cv2.resize(frame, (1020, 600))
    
    # Run YOLO11 tracking on the frame, persisting tracks between frames
    results = model.track(frame, persist=True,classes=0)

    # Check if there are any boxes in the results
    if results[0].boxes is not None and results[0].boxes.id is not None:
        # Get the boxes (x, y, w, h), class IDs, track IDs, and confidences
        boxes = results[0].boxes.xyxy.int().cpu().tolist()  # Bounding boxes
        class_ids = results[0].boxes.cls.int().cpu().tolist()  # Class IDs
        track_ids = results[0].boxes.id.int().cpu().tolist()  # Track IDs
        confidences = results[0].boxes.conf.cpu().tolist()  # Confidence score
       
        for box, class_id, track_id, conf in zip(boxes, class_ids, track_ids, confidences):
            c = names[class_id]
            x1, y1, x2, y2 = box
            cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),2)
            cvzone.putTextRect(frame,f'{track_id}',(x1,y2),1,1)
            cvzone.putTextRect(frame,f'{c}',(x1,y1),1,1)
                  

#    cv2.line(frame,(440,286),(1018,286),(0,0,255),2)
#    cv2.line(frame,(438,261),(1018,261),(255,0,255),2)

    cv2.imshow("RGB", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
       break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 179.8ms
Speed: 4.0ms preprocess, 179.8ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
[428, 19]
[428, 19]

0: 384x640 1 person, 149.2ms
Speed: 2.9ms preprocess, 149.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
[428, 19]

0: 384x640 1 person, 157.2ms
Speed: 2.0ms preprocess, 157.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 147.0ms
Speed: 2.4ms preprocess, 147.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 148.0ms
Speed: 2.1ms preprocess, 148.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 145.0ms
Speed: 3.0ms preprocess, 145.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 141.4ms
Speed: 2.0ms preprocess, 141.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 143.4ms
Speed: 2.4ms preprocess, 143.4ms inference, 