<a href="https://colab.research.google.com/github/VanshJain4/bytetracker/blob/main/bytetracker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =======================
# 1. Install YOLOv8 (Ultralytics)
# =======================
!pip install ultralytics --upgrade -q
from IPython.display import clear_output
clear_output()

# =======================
# 2. Import libraries
# =======================
import cv2
import os
from ultralytics import YOLO
from IPython.display import Video
from tqdm import tqdm

# =======================
# 3. Upload your input video
# =======================
from google.colab import files
uploaded = files.upload()

# Get uploaded video path
video_path = next(iter(uploaded))

# =======================
# 4. Load YOLOv8 model (pretrained on COCO)
# =======================
model = YOLO("yolov8n.pt")  # Options: yolov8n.pt, yolov8s.pt, yolov8m.pt, etc.

# =======================
# 5. Open input video
# =======================
cap = cv2.VideoCapture(video_path)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)

# Output video writer
out_path = "output_people_detected.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(out_path, fourcc, fps, (width, height))

# =======================
# 6. Process video frame by frame
# =======================
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference
    results = model(frame, verbose=False)[0]

    # Draw bounding boxes for 'person' class (class_id = 0 in COCO)
    for box in results.boxes:
        cls_id = int(box.cls[0])
        if cls_id == 0:  # person class
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            label = f'Person {conf:.2f}'
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()

# =======================
# 7. Display the output video
# =======================
Video(out_path, embed=True)


In [None]:
import os

output_txt_dir = "yolo_dets"
os.makedirs(output_txt_dir, exist_ok=True)

frame_id = 0  # Increment this per frame

# Inside your video loop after getting results:
detections = []

for box in results.boxes:
    cls_id = int(box.cls[0])
    if cls_id == 0:  # Person class only
        x1, y1, x2, y2 = map(float, box.xyxy[0])
        w = x2 - x1
        h = y2 - y1
        conf = float(box.conf[0])
        detections.append(f"{frame_id},{x1:.2f},{y1:.2f},{w:.2f},{h:.2f},{conf:.4f}")

# Write detections to file (append per frame)
with open(os.path.join(output_txt_dir, "results.txt"), "a") as f:
    for line in detections:
        f.write(line + "\n")

frame_id += 1
