In [None]:
# Install required packages
!pip install torch torchvision ultralytics
!pip install opencv-python matplotlib
!pip install ipywidgets # for interactive displays

In [None]:
# Import required libraries
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from ultralytics import YOLO
import time

# For display
from IPython.display import display, Image as IPImage
import ipywidgets as widgets

# Check if GPU is available
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

In [None]:
# Load a pretrained YOLO model (YOLOv8n by default)
# You can choose different model sizes: n (nano), s (small), m (medium), l (large), x (extra large)
model = YOLO('yolov8n.pt')  # this downloads the model if not already present

print(f"Model loaded: YOLOv8n")
print(f"Model supports the following classes: {model.names}")

In [None]:
# Utility functions for object detection

def detect_objects_in_image(model, image_path, conf_threshold=0.25):
    """
    Detect objects in an image using the YOLO model
    
    Args:
        model: YOLO model
        image_path: Path to the image
        conf_threshold: Confidence threshold for detections
        
    Returns:
        results: YOLO results object
        img: Original image with bounding boxes
    """
    # Run inference on the image
    results = model(image_path, conf=conf_threshold)
    
    # Get the annotated image with bounding boxes
    for r in results:
        img = r.plot()
        
    return results[0], img

def display_detection_results(img, results):
    """
    Display detection results with annotations
    
    Args:
        img: Image with bounding boxes
        results: YOLO results object
    """
    # Convert from BGR (OpenCV default) to RGB for matplotlib
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Plot the image with detections
    plt.figure(figsize=(12, 8))
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.title(f"Detected {len(results.boxes)} objects")
    plt.show()
    
    # Print detection details
    boxes = results.boxes
    if len(boxes) > 0:
        print(f"\nDetection Details:")
        print("-" * 50)
        for i, box in enumerate(boxes):
            cls_id = int(box.cls.item())
            class_name = results.names[cls_id]
            confidence = box.conf.item()
            x1, y1, x2, y2 = [int(coord) for coord in box.xyxy[0].tolist()]
            print(f"Object {i+1}: Class = {class_name}, Confidence = {confidence:.2f}, Bounding Box = [{x1}, {y1}, {x2}, {y2}]")
    else:
        print("No objects detected.")

In [None]:
# Test detection on a sample image from URL

# Download a test image
!curl -o test_image.jpg https://ultralytics.com/images/zidane.jpg

# Run detection
sample_path = "test_image.jpg"
results, annotated_img = detect_objects_in_image(model, sample_path)

# Display results
display_detection_results(annotated_img, results)

In [None]:
# For uploading your own images (run this in Google Colab)
# If you're using a local Jupyter notebook, you can use other methods to select images

def upload_and_detect():
    """
    Function to upload an image and run object detection
    Note: This works best in Google Colab
    """
    try:
        from google.colab import files
        uploaded = files.upload()
        for filename in uploaded.keys():
            print(f"Uploaded file: {filename}")
            results, annotated_img = detect_objects_in_image(model, filename)
            display_detection_results(annotated_img, results)
    except ImportError:
        print("This function works best in Google Colab.")
        print("If you're running locally, please use the next cell to specify a path to your image.")

# Uncomment to use in Colab
# upload_and_detect()

In [None]:
# For detecting objects in local images

def detect_local_image(image_path, conf_threshold=0.25):
    """
    Run detection on a local image
    
    Args:
        image_path: Path to the local image
        conf_threshold: Confidence threshold
    """
    if not os.path.exists(image_path):
        print(f"Error: File {image_path} not found.")
        return
    
    print(f"Running detection on {image_path}...")
    results, annotated_img = detect_objects_in_image(model, image_path, conf_threshold)
    display_detection_results(annotated_img, results)
    
    return results, annotated_img

# Example usage (uncomment and change path to your image)
# image_path = "path/to/your/image.jpg"  # Change this to your image path
# detect_local_image(image_path)

In [None]:
# For processing video

def process_video(video_path, output_path='output_video.mp4', conf_threshold=0.25, save_video=True, display_frames=False):
    """
    Process a video file for object detection
    
    Args:
        video_path: Path to input video
        output_path: Path to save output video
        conf_threshold: Confidence threshold for detection
        save_video: Whether to save the processed video
        display_frames: Whether to display frames during processing (slows down execution)
    """
    if not os.path.exists(video_path):
        print(f"Error: Video file {video_path} not found.")
        return
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return
    
    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"Video properties: {width}x{height} at {fps} fps, {total_frames} frames")
    
    # Initialize video writer if saving
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Process the video
    frame_count = 0
    start_time = time.time()
    
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
                
            # Process frame
            results = model(frame, conf=conf_threshold)
            annotated_frame = results[0].plot()
            
            # Save frame
            if save_video:
                out.write(annotated_frame)
            
            # Display frame
            if display_frames and frame_count % 5 == 0:  # Show every 5th frame
                img_rgb = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
                plt.figure(figsize=(10, 6))
                plt.imshow(img_rgb)
                plt.axis('off')
                plt.title(f"Frame {frame_count}/{total_frames}")
                plt.show()
            
            # Update progress
            frame_count += 1
            if frame_count % 50 == 0:
                elapsed = time.time() - start_time
                fps_processing = frame_count / elapsed
                estimated_total = elapsed * (total_frames / frame_count)
                remaining = estimated_total - elapsed
                print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%) - {fps_processing:.1f} fps - ETA: {remaining:.1f}s")
    
    except KeyboardInterrupt:
        print("Processing interrupted")
    
    finally:
        # Release resources
        cap.release()
        if save_video:
            out.release()
        
        print(f"Processing complete: {frame_count}/{total_frames} frames processed")
        if save_video and os.path.exists(output_path):
            print(f"Output saved to {output_path}")

# Example usage (uncomment and change path to your video)
# video_path = "path/to/your/video.mp4"  # Change this to your video path
# process_video(video_path, display_frames=False)

In [None]:
# Real-time object detection with webcam

def detect_webcam(camera_id=0, conf_threshold=0.25, save_video=False, output_path='webcam_output.mp4'):
    """
    Run real-time object detection using webcam
    
    Args:
        camera_id: Webcam ID (usually 0 for built-in webcam)
        conf_threshold: Confidence threshold
        save_video: Whether to save the output video
        output_path: Path to save output video if save_video is True
    """
    # Initialize webcam
    cap = cv2.VideoCapture(camera_id)
    if not cap.isOpened():
        print(f"Error: Could not open webcam with ID {camera_id}")
        return
    
    # Get webcam properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = 30  # Target FPS
    
    # Initialize video writer if saving
    out = None
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    print("Webcam object detection started. Press 'q' to quit.")
    
    try:
        while True:
            # Read frame
            ret, frame = cap.read()
            if not ret:
                break
            
            # Run detection
            results = model(frame, conf=conf_threshold)
            annotated_frame = results[0].plot()
            
            # Save frame if requested
            if save_video and out is not None:
                out.write(annotated_frame)
            
            # Display the frame
            cv2.imshow("YOLO Object Detection", annotated_frame)
            
            # Break the loop on 'q' press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    except KeyboardInterrupt:
        print("Webcam detection interrupted")
        
    finally:
        # Release resources
        cap.release()
        if save_video and out is not None:
            out.release()
        cv2.destroyAllWindows()
        print("Webcam detection stopped")
        if save_video:
            print(f"Output saved to {output_path}")

# Example usage (uncomment to run)
# detect_webcam(camera_id=0)  # Use default webcam

## Advanced: Custom Training

You can train YOLO on your own custom dataset to detect specific objects. Here are the basic steps:

1. Prepare a dataset in YOLO format
2. Create a dataset YAML configuration file
3. Train the model
4. Evaluate and export the model

The code cell below shows how to train a model on a custom dataset.

In [None]:
# Custom training on your own dataset
'''
# Example dataset.yaml file structure
# Save this as 'dataset.yaml'

path: /path/to/dataset  # dataset root directory
train: images/train  # train images (relative to 'path')
val: images/val  # val images (relative to 'path')

nc: 3  # number of classes
names: ['person', 'car', 'bicycle']  # class names
'''

# Training function
def train_custom_model(yaml_path, epochs=100, batch_size=16, img_size=640):
    """
    Train a custom YOLO model
    
    Args:
        yaml_path: Path to dataset YAML file
        epochs: Number of training epochs
        batch_size: Batch size
        img_size: Input image size
    """
    # Create a new model from YOLO
    model = YOLO('yolov8n.pt')  # Start with pretrained model
    
    # Train the model
    results = model.train(
        data=yaml_path,
        epochs=epochs,
        batch=batch_size,
        imgsz=img_size,
        patience=50,  # Early stopping patience
        save=True,  # Save checkpoints
        device='0' if torch.cuda.is_available() else 'cpu'
    )
    
    print("Training complete!")
    return model

# Usage example
# yaml_path = "path/to/your/dataset.yaml"  # Change to your dataset YAML path
# trained_model = train_custom_model(yaml_path, epochs=50)

## Conclusion and Next Steps

Congratulations! You've now set up a complete YOLO object detection workflow. You can:

1. Detect objects in images
2. Process videos for object detection
3. Use your webcam for real-time detection
4. Train custom models for specific use cases

### Next Steps

- Try different YOLO models (yolov8s.pt, yolov8m.pt, etc.) for different speed/accuracy tradeoffs
- Experiment with confidence thresholds
- Create a custom dataset for your specific needs
- Deploy your model to edge devices or cloud services

For more information, visit the [Ultralytics YOLOv8 documentation](https://docs.ultralytics.com/).