## Initial Setup

First, we load torch library (+platform) and verify our installation is good. You should get a Python version.

In [None]:
import torch
import platform

print(f"Python Version: {platform.python_version()}")
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")


Next, we display the CUDA capabilities of the system. If you have a GPU, you should see the CUDA version and the name of the GPU.

In [None]:
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
    print(f"CUDA capability: {torch.cuda.get_device_capability(0)}")
else:
    print(
        "CUDA is NOT available. Please check your CUDA Toolkit and driver installation."
    )
print(
    f"PyTorch installed from: {torch.backends.cudnn.version.__name__ if torch.cuda.is_available() else 'CPU only'}"
)

In [None]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2
import numpy as np
from IPython.display import Image, display

model = YOLO('yolo11n.pt')

## Photo Object Detection Example

In [None]:

import time
from collections import Counter

image_path = 'https://ultralytics.com/images/bus.jpg'
print(f"Running inference on: {image_path}")

# Time the inference
start_time = time.perf_counter()
results = model(image_path, verbose=False)
inference_time = time.perf_counter() - start_time

# Time the visualization and count objects
viz_start_time = time.perf_counter()
object_counts = Counter()

for r in results:
    print(f"Detected {len(r.boxes)} objects")
    
    if r.boxes is not None:
        for box in r.boxes:
            class_id = int(box.cls.item())
            class_name = model.names[class_id]
            object_counts[class_name] += 1
    
    annotated_image_array = r.plot()
    annotated_image_rgb = cv2.cvtColor(annotated_image_array, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 8))
    plt.imshow(annotated_image_rgb)
    plt.axis('off')
    plt.title(f"Detected Objects: {len(r.boxes)}")
    plt.show()
viz_time = time.perf_counter() - viz_start_time

print(f"\n=== PHOTO PROCESSING METRICS ===")
print(f"Inference time: {inference_time:.4f} seconds")
print(f"Visualization time: {viz_time:.4f} seconds")
print(f"Total time: {inference_time + viz_time:.4f} seconds")

print(f"\n=== OBJECT DETECTION COUNTS ===")
if object_counts:
    print("Objects detected by type:")
    for obj_type, count in sorted(object_counts.items()):
        print(f"  {obj_type}: {count}")
    print(f"Total objects: {sum(object_counts.values())}")
else:
    print("No objects detected")

print("Basic YOLOv11 test completed.")


# Video Example

In [None]:
from ultralytics import YOLO
import cv2
import time
import os
from collections import Counter

model = YOLO('yolo11n.pt')
video_path = os.path.join(os.getcwd(), 'traffic.mp4')  
output_path = 'annotated_traffic_output.mp4'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video source at {video_path}. Please check the path or webcam availability.")
else:
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Set up video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f"Processing video from: {video_path}")
    print(f"Output video: {output_path}")
    print(f"Video properties: {width}x{height} @ {fps} FPS, {total_frames} frames")
    
    frames_to_process = min(100, total_frames) # Process first 100 frames or entire video if shorter
    processed_count = 0
    
    # Initialize timing variables and object counter
    total_inference_time = 0
    total_write_time = 0
    frame_times = []
    overall_start_time = time.perf_counter()
    video_object_counts = Counter()
    
    while processed_count < frames_to_process:
        frame_start_time = time.perf_counter()
        
        ret, frame = cap.read()
        if not ret:
            print("End of video or failed to read frame.")
            break
            
        # Time the inference for this frame
        inference_start = time.perf_counter()
        results = model(frame, verbose=False) 
        inference_time = time.perf_counter() - inference_start
        total_inference_time += inference_time
        
        # Count objects in this frame
        if results[0].boxes is not None:
            for box in results[0].boxes:
                class_id = int(box.cls.item())
                class_name = model.names[class_id]
                video_object_counts[class_name] += 1
        
        # Time the video writing
        write_start = time.perf_counter()
        annotated_frame = results[0].plot() # Get annotated frame as numpy array
        out.write(annotated_frame) # Write frame to output video
        write_time = time.perf_counter() - write_start
        total_write_time += write_time
        
        frame_total_time = time.perf_counter() - frame_start_time
        frame_times.append(frame_total_time)
        
        processed_count += 1
        
        # Progress update every 10 frames
        if processed_count % 10 == 0:
            progress = (processed_count / frames_to_process) * 100
        
    overall_time = time.perf_counter() - overall_start_time
    
    # Clean up
    cap.release()
    out.release()
    
    # Check if output file was created successfully
    if os.path.exists(output_path):
        file_size = os.path.getsize(output_path) / (1024 * 1024)  # Size in MB
        print(f"\n=== OUTPUT VIDEO CREATED ===")
        print(f"File: {output_path}")
        print(f"Size: {file_size:.2f} MB")
    else:
        print(f"\nWarning: Output file {output_path} was not created successfully.")
    
    # Print comprehensive timing metrics
    print(f"\n=== VIDEO PROCESSING METRICS ===")
    print(f"Frames processed: {processed_count}")
    print(f"Total inference time: {total_inference_time:.4f} seconds")
    print(f"Total video writing time: {total_write_time:.4f} seconds")
    print(f"Average inference per frame: {total_inference_time/processed_count:.4f} seconds")
    print(f"Average writing per frame: {total_write_time/processed_count:.4f} seconds")
    print(f"Average processing per frame: {sum(frame_times)/len(frame_times):.4f} seconds")
    print(f"Frames per second (processing only): {processed_count/sum(frame_times):.2f} FPS")
    print(f"Overall time: {overall_time:.4f} seconds")
    
    # Print object detection counts
    print(f"\n=== VIDEO OBJECT DETECTION COUNTS ===")
    if video_object_counts:
        print("Objects detected by type across all processed frames:")
        for obj_type, count in sorted(video_object_counts.items()):
            print(f"  {obj_type}: {count}")
        print(f"Total objects detected: {sum(video_object_counts.values())}")
        print(f"Average objects per frame: {sum(video_object_counts.values())/processed_count:.1f}")
    else:
        print("No objects detected in any frames")
    
    print(f"Output video created: {output_path}")
    print("Video processing and export completed.")


In [None]:
from ultralytics import YOLO
import cv2
import time
import os
from collections import Counter

# Process entire video (uncomment and run this cell for full video processing)
model = YOLO('yolo11n.pt')
video_path = '/home/calem/traffic.mp4' # change this to your video file location...
output_path = 'complete_annotated_traffic.mp4' # Output video file
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video source at {video_path}. Please check the path or webcam availability.")
else:
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    
    # Set up video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print(f"Processing COMPLETE video from: {video_path}")
    print(f"Output video: {output_path}")
    print(f"Video properties: {width}x{height} @ {fps} FPS")
    print(f"Total frames: {total_frames} ({duration:.1f} seconds)")
    print("This may take several minutes depending on video length and GPU performance...")
    
    processed_count = 0
    
    # Initialize timing variables and object counter
    total_inference_time = 0
    total_write_time = 0
    frame_times = []
    overall_start_time = time.perf_counter()
    complete_video_object_counts = Counter()
    
    # Process all frames
    while True:
        frame_start_time = time.perf_counter()
        
        ret, frame = cap.read()
        if not ret:
            print("Reached end of video.")
            break
            
        # Time the inference for this frame
        inference_start = time.perf_counter()
        results = model(frame, verbose=False) 
        inference_time = time.perf_counter() - inference_start
        total_inference_time += inference_time
        
        # Count objects in this frame
        if results[0].boxes is not None:
            for box in results[0].boxes:
                class_id = int(box.cls.item())
                class_name = model.names[class_id]
                complete_video_object_counts[class_name] += 1
        
        # Time the video writing
        write_start = time.perf_counter()
        annotated_frame = results[0].plot() # Get annotated frame as numpy array
        out.write(annotated_frame) # Write frame to output video
        write_time = time.perf_counter() - write_start
        total_write_time += write_time
        
        frame_total_time = time.perf_counter() - frame_start_time
        frame_times.append(frame_total_time)
        
        processed_count += 1
        
        # Progress update every 50 frames to avoid spam
        if processed_count % 50 == 0:
            progress = (processed_count / total_frames) * 100
            elapsed_time = time.perf_counter() - overall_start_time
            estimated_total = elapsed_time * (total_frames / processed_count)
            remaining_time = estimated_total - elapsed_time
        
    overall_time = time.perf_counter() - overall_start_time
    
    # Clean up
    cap.release()
    out.release()
    
    # Check if output file was created successfully
    if os.path.exists(output_path):
        file_size = os.path.getsize(output_path) / (1024 * 1024)  # Size in MB
        print(f"\n=== COMPLETE OUTPUT VIDEO CREATED ===")
        print(f"File: {output_path}")
        print(f"Size: {file_size:.2f} MB")
    else:
        print(f"\nWarning: Output file {output_path} was not created successfully.")
    
    # Print comprehensive timing metrics
    print(f"\n=== COMPLETE VIDEO PROCESSING METRICS ===")
    print(f"Frames processed: {processed_count}")
    print(f"Original video duration: {duration:.1f} seconds")
    print(f"Processing time: {overall_time/60:.1f} minutes")
    print(f"Processing speed: {overall_time/duration:.1f}x real-time")
    print(f"Total inference time: {total_inference_time:.4f} seconds")
    print(f"Total video writing time: {total_write_time:.4f} seconds")
    print(f"Average inference per frame: {total_inference_time/processed_count:.4f} seconds")
    print(f"Average writing per frame: {total_write_time/processed_count:.4f} seconds")
    print(f"Average processing per frame: {sum(frame_times)/len(frame_times):.4f} seconds")
    print(f"Theoretical max FPS: {1/(total_inference_time/processed_count):.1f} FPS")
    print(f"Actual processing FPS: {processed_count/sum(frame_times):.2f} FPS")
    
    # Print complete video object detection counts
    print(f"\n=== COMPLETE VIDEO OBJECT DETECTION COUNTS ===")
    if complete_video_object_counts:
        print("Objects detected by type across entire video:")
        for obj_type, count in sorted(complete_video_object_counts.items()):
            print(f"  {obj_type}: {count}")
        print(f"Total objects detected: {sum(complete_video_object_counts.values())}")
        print(f"Average objects per frame: {sum(complete_video_object_counts.values())/processed_count:.1f}")
        print(f"Objects per second: {sum(complete_video_object_counts.values())/duration:.1f}")
    else:
        print("No objects detected in any frames")
    
    print(f"Complete annotated video saved as: {output_path}")
    print("Complete video processing finished!")
