# Click-to-Label: SAM-Powered Manual Annotation

**Purpose**: Label ANY object (even new classes like Hammer) by clicking on it.

**How it works**:
1. Upload your images.
2. Click on objects in each image.
3. SAM automatically segments the clicked object.
4. Type the class name.
5. Download YOLO-format labels.

In [None]:
# Step 1: Install Dependencies
!pip install segment-anything opencv-python-headless
!pip install ipywidgets

In [None]:
# Step 2: Upload Images
import os
import zipfile
from google.colab import files

os.makedirs("input_images", exist_ok=True)
os.makedirs("output_labels", exist_ok=True)

IMAGES_DIR = "input_images"
LABELS_DIR = "output_labels"

print("--- UPLOAD IMAGES ---")
print("Upload a ZIP of images OR individual image files.")

uploaded = files.upload()

for filename in uploaded.keys():
    if filename.endswith('.zip'):
        with zipfile.ZipFile(filename, 'r') as z:
            z.extractall(IMAGES_DIR)
        print(f"Extracted zip to {IMAGES_DIR}")
    else:
        os.rename(filename, os.path.join(IMAGES_DIR, filename))
        print(f"Moved {filename} to {IMAGES_DIR}")

# List images
image_files = []
for root, dirs, fnames in os.walk(IMAGES_DIR):
    for f in fnames:
        if f.lower().endswith(('.jpg', '.png', '.jpeg')):
            image_files.append(os.path.join(root, f))

print(f"Found {len(image_files)} images.")

In [None]:
# Step 3: Load SAM Model
import torch
from segment_anything import sam_model_registry, SamPredictor

SAM_CHECKPOINT = "sam_vit_h_4b8939.pth"
if not os.path.exists(SAM_CHECKPOINT):
    print("Downloading SAM checkpoint...")
    !wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
    print("Downloaded!")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

sam = sam_model_registry["vit_h"](checkpoint=SAM_CHECKPOINT)
sam.to(device=DEVICE)
predictor = SamPredictor(sam)
print("SAM Model Loaded!")

In [None]:
# Step 4: Interactive Click-to-Label
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from google.colab import output

# Store all labels
all_labels = {}  # {image_path: [(class_name, x_center, y_center, width, height), ...]}
class_names = set()  # Track all unique class names

def get_bounding_box_from_mask(mask):
    """Convert a binary mask to YOLO format bounding box."""
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return None
    
    y_min = np.where(rows)[0][0]
    y_max = np.where(rows)[0][-1]
    x_min = np.where(cols)[0][0]
    x_max = np.where(cols)[0][-1]
    
    return x_min, y_min, x_max, y_max

def label_image(img_path):
    """Label a single image interactively."""
    image = cv2.imread(img_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    h, w = image.shape[:2]
    
    predictor.set_image(image_rgb)
    
    labels_for_image = []
    display_image = image.copy()
    
    print(f"\n--- Labeling: {os.path.basename(img_path)} ---")
    print("Image size:", w, "x", h)
    cv2_imshow(display_image)
    
    while True:
        print("\nEnter click coordinates (x y) or 'done' to finish this image:")
        user_input = input().strip()
        
        if user_input.lower() == 'done':
            break
        
        try:
            parts = user_input.split()
            click_x = int(parts[0])
            click_y = int(parts[1])
        except:
            print("Invalid input. Use format: x y (e.g., 150 200)")
            continue
        
        # Run SAM prediction
        input_point = np.array([[click_x, click_y]])
        input_label = np.array([1])  # 1 = foreground
        
        masks, scores, _ = predictor.predict(
            point_coords=input_point,
            point_labels=input_label,
            multimask_output=True,
        )
        
        # Use the best mask
        best_idx = np.argmax(scores)
        mask = masks[best_idx]
        
        # Get bounding box
        bbox = get_bounding_box_from_mask(mask)
        if bbox is None:
            print("No object found at that location.")
            continue
        
        x_min, y_min, x_max, y_max = bbox
        
        # Draw on display image
        color = (0, 255, 0)
        cv2.rectangle(display_image, (x_min, y_min), (x_max, y_max), color, 2)
        cv2.circle(display_image, (click_x, click_y), 5, (255, 0, 0), -1)
        
        # Show updated image
        output.clear()
        print(f"--- Labeling: {os.path.basename(img_path)} ---")
        cv2_imshow(display_image)
        print(f"\nDetected box: ({x_min}, {y_min}) to ({x_max}, {y_max})")
        
        # Ask for class name
        print("Enter class name for this object:")
        class_name = input().strip()
        
        if class_name:
            # Convert to YOLO format (normalized)
            x_center = ((x_min + x_max) / 2) / w
            y_center = ((y_min + y_max) / 2) / h
            box_w = (x_max - x_min) / w
            box_h = (y_max - y_min) / h
            
            labels_for_image.append((class_name, x_center, y_center, box_w, box_h))
            class_names.add(class_name)
            
            # Draw label text
            cv2.putText(display_image, class_name, (x_min, y_min - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
            
            print(f"Added: {class_name}")
    
    return labels_for_image

print("Ready to start labeling!")
print(f"You have {len(image_files)} images to label.")

In [None]:
# Step 5: Label All Images
# Run this cell to start the labeling process

for img_path in image_files:
    labels = label_image(img_path)
    all_labels[img_path] = labels
    print(f"\nLabeled {len(labels)} objects in {os.path.basename(img_path)}")

print("\n=== LABELING COMPLETE ===")
print(f"Total images labeled: {len(all_labels)}")
print(f"Classes found: {class_names}")

In [None]:
# Step 6: Export YOLO Labels
import shutil

# Create class mapping
class_list = sorted(list(class_names))
class_to_id = {}
for i in range(len(class_list)):
    class_to_id[class_list[i]] = i

# Write classes.txt
with open(os.path.join(LABELS_DIR, "classes.txt"), "w") as f:
    for c in class_list:
        f.write(c + "\n")

# Create images folder in output
out_images = os.path.join(LABELS_DIR, "images")
out_labels = os.path.join(LABELS_DIR, "labels")
os.makedirs(out_images, exist_ok=True)
os.makedirs(out_labels, exist_ok=True)

# Write label files
for img_path, labels in all_labels.items():
    if len(labels) == 0:
        continue
    
    filename = os.path.basename(img_path)
    base_name = os.path.splitext(filename)[0]
    
    # Copy image
    shutil.copy(img_path, os.path.join(out_images, filename))
    
    # Write labels
    label_path = os.path.join(out_labels, base_name + ".txt")
    with open(label_path, "w") as f:
        for class_name, x_c, y_c, w, h in labels:
            class_id = class_to_id[class_name]
            f.write(f"{class_id} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}\n")

print(f"Exported {len([l for l in all_labels.values() if len(l) > 0])} labeled images.")
print(f"Classes: {class_list}")

In [None]:
# Step 7: Download Results
!zip -r click_labels.zip output_labels

from google.colab import files
files.download('click_labels.zip')