In [1]:
import pandas as pd
import numpy as np

from ultralytics import YOLO
import cv2
import os
from pathlib import Path

Creating new Ultralytics Settings v0.0.6 file âœ… 
View Ultralytics Settings with 'yolo settings' or at '/home/isim/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# Load YOLOv8 segmentation model
model = YOLO('yolov8n-seg.pt')  # or 'yolov8s-seg.pt', 'yolov8m-seg.pt', etc.

# Path to demonstrations folder
demo_folder = Path('demonstrations')
output_folder = Path('output_videos')
output_folder.mkdir(exist_ok=True)

# Get all video files from demonstrations folder
video_files = list(demo_folder.glob('*.mp4')) + list(demo_folder.glob('*.avi')) + list(demo_folder.glob('*.mov'))

print(f"Found {len(video_files)} video(s) in demonstrations folder:")
for video in video_files:
    print(f"  - {video.name}")

# Process each video
for video_path in video_files:
    print(f"\nProcessing: {video_path}")
    
    # Get video properties
    cap = cv2.VideoCapture(str(video_path))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    
    print(f"  Resolution: {width}x{height}, FPS: {fps}, Frames: {frame_count}")
    
    # Create output video writer
    output_path = output_folder / f"annotated_{video_path.name}"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
    
    # Run inference on the video
    results = model(str(video_path), stream=True)
    
    # Process each frame
    frame_idx = 0
    for result in results:
        # Get annotated frame with overlays
        annotated_frame = result.plot()
        
        # Write to output video
        out.write(annotated_frame)
        
        # Print detection summary for this frame
        if result.masks is not None:
            masks = result.masks.data  # segmentation masks
            boxes = result.boxes  # bounding boxes object
            classes = boxes.cls.cpu().numpy()  # class IDs
            names = [model.names[int(cls)] for cls in classes]
            
            # Count object types
            print(f"  Frame {frame_idx}: {len(masks)} objects - {names}")
        else:
            print(f"  Frame {frame_idx}: No objects detected")
        
        frame_idx += 1
    
    # Release video writer
    out.release()
    print(f"  Saved annotated video to: {output_path}")
    
    # Summary of detections across video
    print(f"\nCompleted processing {video_path.name}")
    print(f"Total frames processed: {frame_idx}")


Found 1 video(s) in demonstrations folder:
  - demonstration1.mp4

Processing: demonstrations/demonstration1.mp4
  Resolution: 1472x830, FPS: 30, Frames: 1167

video 1/1 (frame 1/1167) /home/isim/SCU/ECEN524/VLA_basis/demonstrations/demonstration1.mp4: 384x640 1 person, 1 dining table, 17.1ms
  Frame 0: 2 objects - ['person', 'dining table']
video 1/1 (frame 2/1167) /home/isim/SCU/ECEN524/VLA_basis/demonstrations/demonstration1.mp4: 384x640 1 person, 1 dining table, 6.9ms
  Frame 1: 2 objects - ['person', 'dining table']
video 1/1 (frame 3/1167) /home/isim/SCU/ECEN524/VLA_basis/demonstrations/demonstration1.mp4: 384x640 1 person, 5.9ms
  Frame 2: 1 objects - ['person']
video 1/1 (frame 4/1167) /home/isim/SCU/ECEN524/VLA_basis/demonstrations/demonstration1.mp4: 384x640 1 person, 5.8ms
  Frame 3: 1 objects - ['person']
video 1/1 (frame 5/1167) /home/isim/SCU/ECEN524/VLA_basis/demonstrations/demonstration1.mp4: 384x640 1 person, 5.8ms
  Frame 4: 1 objects - ['person']
video 1/1 (frame 6/1