In [None]:
import cv2
import os
from ultralytics import YOLO

In [None]:
def split_video_to_frames(video_path, frames_folder):
    """
    Splits a video into frames and saves them into the specified folder.
    """
    os.makedirs(frames_folder, exist_ok=True)
    captured_vid = cv2.VideoCapture(video_path)
    frame_count = 0

    while True:
        #reads one frame per iteration, .read() returns a tuple (ret, frame)
        # ret: boolean indicating if frame was successfully read
        # frame: frame image stored as NumPy array
        ret, frame = captured_vid.read()

        if not ret:
            break
        cv2.imwrite(os.path.join(frames_folder, f'frame_{frame_count:06d}.jpg'), frame) # save current frame withi the frames_folder
        frame_count += 1

    #free up resources (file handles, memory) associated with the cv2.VideoCaptuer object
    captured_vid.release() 
    return frame_count


def detect_and_annotate(frames_folder, processed_folder, model, total_frames):
    """
    Performs object detection on each frame using a YOLO model,
    draws bounding boxes (for cars) with centroids, and saves the new frames.
    """
    os.makedirs(processed_folder, exist_ok=True)

    for i in range(total_frames):
        frame_path = os.path.join(frames_folder, f'frame_{i:06d}.jpg')
        frame = cv2.imread(frame_path)
        if frame is None:
            continue
        
        # Run object detection
        results = model(frame)

        # Draw bounding boxes and centroids
        for r in results:
            for box in r.boxes:
                class_idx = int(box.cls[0])       # Integer index of the predicted class
                label = r.names[class_idx]        # Class label (e.g., "car", "person", etc.)
                if label == "car":               # Filter for the "car" class
                    x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coords
                    conf = box.conf[0].item()               # Confidence score

                    # Draw bounding box
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

                    # Class label + confidence
                    text = f"{label} {conf:.2f}"
                    cv2.putText(frame, text, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                    # Draw centroid
                    cx = (x1 + x2) // 2
                    cy = (y1 + y2) // 2
                    cv2.circle(frame, (cx, cy), 5, (0, 255, 0), -1)
        
        # Save the annotated frame
        cv2.imwrite(os.path.join(processed_folder, f'frame_{i:06d}.jpg'), frame)

In [None]:
def rebuild_video_from_frames(processed_folder, output_video, total_frames, fps=30):
    """
    Rebuilds a video from processed frames.
    """
    # Read the first frame to get size info
    first_frame_path = os.path.join(processed_folder, 'frame_000000.jpg')
    first_frame = cv2.imread(first_frame_path)
    
    #raise execption if the frame was not read (may indicate the frame was not extracted/processed correctly)
    if first_frame is None:
        raise FileNotFoundError(f"Could not read the file: {first_frame_path}")

    height, width, _ = first_frame.shape

    # Create VideoWriter
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # or 'XVID', etc.
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

    # Write each processed frame to the new video
    for i in range(total_frames):
        processed_frame_path = os.path.join(processed_folder, f'frame_{i:06d}.jpg')
        processed_frame = cv2.imread(processed_frame_path)
        if processed_frame is not None:
            out.write(processed_frame)

    out.release()

In [None]:
#1) Split the video into frames
video_path = "Assignment_3_video_ActiveTrack"
frames_folder = "extracted_frames"
total_frames = split_video_to_frames(video_path, frames_folder)

#2) Load a YOLO model (YOLOv8n pretrained on COCO)
model = YOLO("yolov8n.pt")

#3. Detect and annotate frames with bounding box + centroid for "car" objects
processed_folder = "processed_frames"
detect_and_annotate(frames_folder, processed_folder, model, total_frames)
                    
#4. Rebuild the annotated frames into a new video
output_video = "output_video.mp4"
rebuild_video_from_frames(processed_folder, output_video, total_frames, fps=30)
print(f"Object Detection with bounding boxes now complete. Annotated video has been savet to: {output_video}")