In [8]:
import cv2
import time
from ultralytics import YOLO
import numpy as np
import streamlink

# Load the YOLOv8 model
# model = YOLO("../yolov8n.pt")
# model = YOLO("../yolov8l.pt")
model = YOLO("../yolov8x.pt")
# model = YOLO("../yolov8n-obb.pt")
# model = YOLO("../yolov8n-seg.pt")
# model = YOLO("../yolov8l-seg.pt")
# model = YOLO("../mybest.pt")
# model = YOLO("../hatbest.pt")
# model = YOLO("../my-best-segment.pt")
# model = YOLO("../yolov10n.pt")
# model = YOLO("../yolov8n.pt")
# model = YOLO("../custom_yolov8s.pt")


# Open Live stream
# url = 'https://www.youtube.com/watch?v=DjdUEyjx8GM'
url = 'https://www.youtube.com/watch?v=gFRtAAmiFbE'
# url = 'https://www.youtube.com/watch?v=KY4Yd5QR570'

# Live streaming
streams = streamlink.streams(url)
# cap = cv2.VideoCapture(streams["360p"].url)
cap = cv2.VideoCapture(streams["1080p"].url)
# cap = cv2.VideoCapture(streams["best"].url)

# Open  video file
# video_path = "short_video.mp4"
# cap = cv2.VideoCapture(video_path)

# Variables for FPS calculation
prev_frame_time = 0
new_frame_time = 0

# Get the video frame dimensions
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the counting lines (adjust these values as needed)
left_line = int(frame_width * 0.2)  # 20% from the left
right_line = int(frame_width * 0.8)  # 20% from the right

# Counters for people exiting
left_exit_count = 0
right_exit_count = 0

# Dictionary to store track histories
track_history = {}

# Sets to store IDs of people who have left
left_exited_ids = set()
right_exited_ids = set()

# Function to check if a person has crossed a line
def has_crossed_line(prev_pos, curr_pos, line_pos):
    return (prev_pos < line_pos and curr_pos >= line_pos) or (prev_pos > line_pos and curr_pos <= line_pos)

def smooth_polygon(polygon, smoothing=5):
    """Apply smoothing to the polygon points."""
    smooth_polygon = np.array(polygon, dtype=np.float32)
    for _ in range(smoothing):
        smooth_polygon = np.array([np.mean(np.roll(smooth_polygon, shift, axis=0), axis=0)
                                   for shift in range(-1, 2)])
    return np.array(smooth_polygon, dtype=np.int32)


# Frame skip variable
frame_skip = 0 # 2 is good
frame_counter = 0

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    # Skip frames to catch up with the stream
    # frame_counter += 1
    # if frame_skip > 0:
    #     if frame_counter % frame_skip != 0:
    #         continue
    
    if success:
        
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        # results = model.track(frame, persist=True, classes=0)  # class 0 is person
        results = model.track(frame, persist=True, classes=0, conf=0.1,tracker="bytetrack.yaml")  # class 0 is person, conf=0.25 sets the confidence threshold to 25%
        # results = model.track(frame, persist=True, classes=0, conf=0.1)  # class 0 is person, conf=0.25 sets the confidence threshold to 25%
        # results = model.track(frame, persist=True, conf=0.1)
        # annotation
        # frame = results[0].plot() # info from yolo v8, optional, can comment off
        
        # # Create a blank overlay for the semi-transparent masks
        overlay = np.zeros_like(frame, dtype=np.uint8)
        
                                    # Process each detected person
        if results[0].masks is not None:
            for mask in results[0].masks.xy:
                # Convert the polygon to a format suitable for cv2.fillPoly
                polygon = np.array(mask, dtype=np.int32)
                
                # Fill the polygon on the overlay
                cv2.fillPoly(overlay, [polygon], color=(255, 0, 255))  # Red color
        
        # Blend the overlay with the original frame
        alpha = 0.3  # Adjust this value to change the transparency (0.0 - 1.0)
        annotated_frame = cv2.addWeighted(frame, 1, overlay, alpha, 0)

        # Draw the counting lines
        cv2.line(annotated_frame, (left_line, 0), (left_line, frame_height), (255, 255, 0), 2)
        cv2.line(annotated_frame, (right_line, 0), (right_line, frame_height), (255, 255, 0), 2)
        
        # Process each detected person
        if results[0].boxes is not None and results[0].boxes.id is not None:
            for box, track_id in zip(results[0].boxes.xywh, results[0].boxes.id):
                x, y, w, h = box
                track_id = int(track_id)
                center_x, center_y = int(x), int(y)
                
                # Store the center points of each track
                if track_id not in track_history:
                    track_history[track_id] = []
                track_history[track_id].append((center_x, center_y))
                
                # Keep only the last 30 positions
                track_history[track_id] = track_history[track_id][-30:]
                
                # Check if the person has crossed a line, algo sucks....
                if len(track_history[track_id]) > 1:
                    prev_x = np.mean([pos[0] for pos in track_history[track_id][:-10]])
                    curr_x = np.mean([pos[0] for pos in track_history[track_id][-10:]])
                    
                    if has_crossed_line(prev_x, curr_x, left_line) and track_id not in left_exited_ids:
                        left_exit_count += 1
                        left_exited_ids.add(track_id)
                    elif has_crossed_line(prev_x, curr_x, right_line) and track_id not in right_exited_ids:
                        right_exit_count += 1
                        right_exited_ids.add(track_id)
                
                # Draw the track
                if len(track_history[track_id]) > 1:
                    cv2.polylines(annotated_frame, [np.array(track_history[track_id], dtype=np.int32)], False, (0, 255, 0), 2)
                
                # Draw ID near the person
                cv2.putText(annotated_frame, f"ID: {track_id}", (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Calculate FPS
        new_frame_time = time.time()
        fps = 1.0 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time
        
        # Count the number of detected objects
        num_objects = len(results[0].boxes) if results[0].boxes is not None else 0
        
        # Put the FPS, object count, and exit counts on the frame
        cv2.putText(annotated_frame, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(annotated_frame, f"Detected: {num_objects}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(annotated_frame, f"Left exits: {left_exit_count}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(annotated_frame, f"Right exits: {right_exit_count}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Put the "NUS-ISS Demo" text in the top-right corner
        text = "NUS-ISS Demo"
        text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
        text_x = annotated_frame.shape[1] - text_size[0] - 10
        text_y = 30
        cv2.putText(annotated_frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)
        
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 384x640 7 persons, 18.0ms
Speed: 2.9ms preprocess, 18.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 17.1ms
Speed: 1.1ms preprocess, 17.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 19.2ms
Speed: 0.0ms preprocess, 19.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 14.6ms
Speed: 2.1ms preprocess, 14.6ms inference, 4.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 16.4ms
Speed: 1.7ms preprocess, 16.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 16.5ms
Speed: 1.5ms preprocess, 16.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 16.4ms
Speed: 1.0ms preprocess, 16.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


0: 384x640 8 persons, 15.2ms
Speed: 0.0ms preprocess, 15.2ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 16.9ms
Speed: 1.0ms preprocess, 16.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 16.7ms
Speed: 4.2ms preprocess, 16.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 12.4ms
Speed: 4.2ms preprocess, 12.4ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 12.9ms
Speed: 0.7ms preprocess, 12.9ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 12.3ms
Speed: 0.0ms preprocess, 12.3ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 12.4ms
Speed: 0.0ms preprocess, 12.4ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 15.4ms
Speed: 0.0ms preprocess, 15.4ms inference, 1.4ms postprocess per image at shape (1