In [None]:
# Just for object detection using YOLOv8

import numpy as np
from ultralytics import YOLO
import cv2
import cvzone
import math

# Define the video path (use a video that has a single person to detect a single person, otherwise it detects multiple people at the same time)
cap = cv2.VideoCapture("videoplayback.mp4")

model = YOLO("../Yolo-Weights/yolov8l.pt")  # Load the model

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ] # List of class names

key = 0  # Initialize key

while True:
    success, img = cap.read()  # Read the video
    
    if not success:
        break  # Exit loop if video has ended

    results = model(img, stream=True)  # Detect objects
    
    detections = np.empty((0, 5))  # Initialize empty array to store detections

    for r in results:
        boxes = r.boxes
        for box in boxes:
            # Bounding Box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            w, h = x2 - x1, y2 - y1

            # Confidence
            conf = math.ceil((box.conf[0] * 100)) / 100
            # Class Name
            cls = int(box.cls[0])
            currentClass = classNames[cls]

            if currentClass == "person" and conf > 0.3:
                cvzone.putTextRect(img, f'{currentClass} {conf}', (max(0, x1), max(35, y1)),
                                scale=0.6, thickness=1, offset=3) # Display the class name and confidence score
                cvzone.cornerRect(img, (x1, y1, w, h), l=9, rt=5) # Display the bounding box

    cv2.imshow("Image", img)  # Display the image
    key = cv2.waitKey(1) & 0xFF  # Wait for key

    if key == ord('c') or key == ord('C'):  # Exit if 'c' or 'C' is pressed
        break

cap.release()  # Release the video capture
cv2.destroyAllWindows() # Close all windows

In [1]:
from deep_sort.utils.parser import get_config
from deep_sort.deep_sort import DeepSort

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7' # Path to the DeepSort model
tracker = DeepSort(model_path=deep_sort_weights, max_age=70) # Initialize the DeepSort tracker

In [2]:
import torch
import cv2

video_path = 'videoplayback.mp4' # Path to the video

cap = cv2.VideoCapture(video_path) # Open the video

# Get the video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_path = 'output.mp4'
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda', index=0)

In [4]:
frames = []

unique_track_ids = set()

In [None]:
import time
from ultralytics import YOLO

# Initialize variables
i = 0
counter, fps, elapsed = 0, 0, 0
start_time = time.perf_counter()
person_tracked = False
tracked_person_id = None

# Load the YOLO model
model = YOLO("yolov8n.pt")  # Load a pretrained model

while cap.isOpened():
    ret, frame = cap.read()  # Read the frame

    if ret:
        og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB - OpenCV uses BGR
        frame = og_frame.copy()  # Copy the frame

        # Run inference with YOLO model
        results = model(frame, device=0, classes=0, conf=0.8)  # Run inference
        
        class_names = ['person']  # List of class names
        bboxes_xywh = []  # List of bounding boxes
        confs = []  # List of confidence scores
        ids = []  # List of object IDs

        for result in results:
            boxes = result.boxes  # Boxes object for bbox outputs
            cls = boxes.cls.tolist()  # Convert tensor to list
            xywh = boxes.xywh.cpu().numpy() # x, y, width, height
            conf = boxes.conf.cpu().numpy()    # confidence scores

            for i, class_index in enumerate(cls): 
                if class_names[int(class_index)] == 'person':
                    bboxes_xywh.append(xywh[i]) 
                    confs.append(conf[i])
                    ids.append(i)

        # If a person is already being tracked, continue tracking that person
        if person_tracked:
            if tracked_person_id in ids:
                index = ids.index(tracked_person_id)
                tracker_bbox = bboxes_xywh[index]
            else:
                # If the tracked person is not detected, continue without updating
                tracker_bbox = None
        else:
            # If no person is being tracked, start tracking the first detected person
            if len(bboxes_xywh) > 0:
                tracker_bbox = bboxes_xywh[0]
                tracked_person_id = ids[0]
                person_tracked = True
            else:
                tracker_bbox = None

        if tracker_bbox is not None:
            x, y, w, h = tracker_bbox # x, y, width, height
            cv2.rectangle(og_frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0, 255, 0), 2) # Draw the bounding box
            cv2.putText(og_frame, f"Person-{tracked_person_id}", (int(x) + 10, int(y) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) # Draw the object ID

        # Update FPS and place on frame
        current_time = time.perf_counter() # Get the current time
        elapsed = (current_time - start_time) # Get the elapsed time
        counter += 1 # Increment the frame counter
        if elapsed > 1:
            fps = counter / elapsed # Calculate the FPS
            counter = 0 # Reset the frame counter
            start_time = current_time # Reset the start time

        # Draw FPS on frame
        cv2.putText(og_frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Write the frame to the output video file
        out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR))

        # Show the frame
        cv2.imshow("Video", og_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release() 
out.release()
cv2.destroyAllWindows()