# SAM 2 Auto-Segmentation Pipeline

Uses YOLO for detection + SAM 2 for precise segmentation masks.

**Note:** SAM 2 requires GPU. Run this on Colab/Kaggle with GPU enabled.

In [None]:
# Step 1: Install dependencies
!pip install ultralytics
!pip install git+https://github.com/facebookresearch/sam2.git

In [None]:
# Step 2: Set paths

PROJECT_ROOT = "c:/Users/Tesla Laptops/Videos/Construction-Site-Safety-PPE-Detection"

# Your trained YOLO model for detection
YOLO_MODEL = f"{PROJECT_ROOT}/models/best.pt"

# Input images folder
INPUT_IMAGES = f"{PROJECT_ROOT}/new_images"

# Output folder for masks
OUTPUT_FOLDER = f"{PROJECT_ROOT}/sam_outputs"

In [None]:
# Step 3: Load models
import torch
from ultralytics import YOLO
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load YOLO
yolo_model = YOLO(YOLO_MODEL)
print(f"YOLO classes: {yolo_model.names}")

# Load SAM 2 (will download checkpoint automatically)
sam2_checkpoint = "facebook/sam2-hiera-large"
sam2_model = build_sam2(sam2_checkpoint)
predictor = SAM2ImagePredictor(sam2_model)
print("SAM 2 loaded!")

In [None]:
# Step 4: Run YOLO + SAM pipeline
import os
import cv2
import numpy as np
from PIL import Image

os.makedirs(OUTPUT_FOLDER, exist_ok=True)
os.makedirs(f"{OUTPUT_FOLDER}/masks", exist_ok=True)
os.makedirs(f"{OUTPUT_FOLDER}/visualizations", exist_ok=True)

# Get all images
image_files = []
for ext in ['jpg', 'jpeg', 'png']:
    for f in os.listdir(INPUT_IMAGES):
        if f.lower().endswith(ext):
            image_files.append(f)

print(f"Found {len(image_files)} images")

for img_file in image_files:
    img_path = os.path.join(INPUT_IMAGES, img_file)
    image = cv2.imread(img_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Run YOLO detection
    results = yolo_model.predict(image_rgb, verbose=False)[0]
    boxes = results.boxes.xyxy.cpu().numpy()
    
    if len(boxes) == 0:
        print(f"{img_file}: No detections")
        continue
    
    # Run SAM 2 with YOLO boxes as prompts
    predictor.set_image(image_rgb)
    
    all_masks = []
    for box in boxes:
        masks, scores, _ = predictor.predict(
            box=box,
            multimask_output=False
        )
        all_masks.append(masks[0])
    
    # Save combined mask
    combined_mask = np.zeros(image.shape[:2], dtype=np.uint8)
    for i, mask in enumerate(all_masks):
        combined_mask[mask] = i + 1
    
    mask_path = f"{OUTPUT_FOLDER}/masks/{img_file.rsplit('.', 1)[0]}_mask.png"
    cv2.imwrite(mask_path, combined_mask)
    
    # Save visualization
    vis_image = image.copy()
    for mask in all_masks:
        color = np.random.randint(0, 255, 3).tolist()
        vis_image[mask] = vis_image[mask] * 0.5 + np.array(color) * 0.5
    
    vis_path = f"{OUTPUT_FOLDER}/visualizations/{img_file}"
    cv2.imwrite(vis_path, vis_image)
    
    print(f"{img_file}: {len(boxes)} objects segmented")

print(f"\nDone! Results saved to {OUTPUT_FOLDER}")

## Output

- `masks/` - Binary mask images for each input image
- `visualizations/` - Original images with colored mask overlays