# Annotate

## SAM2 bbox annotations

In [None]:
from typing import Tuple

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image


def generate_grid_2d_num_points(
    x_min: float, x_max: float, y_min: float, y_max: float, nx: int, ny: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Generate 2D grid with specified number of points in each direction.

    Args:
        x_min (float): Minimum X value.
        x_max (float): Maximum X value.
        y_min (float): Minimum Y value.
        y_max (float): Maximum Y value.
        nx (int): Number of points in X direction.
        ny (int): Number of points in Y direction.

    Returns:
        Tuple[np.ndarray, np.ndarray]: (X_grid, Y_grid) meshgrid arrays.
    """
    x_coords = np.linspace(x_min, x_max, nx)
    y_coords = np.linspace(y_min, y_max, ny)
    X, Y = np.meshgrid(x_coords, y_coords)
    flattened = [grid.flatten() for grid in [X, Y]]
    return np.column_stack(flattened)


def show_mask(mask, ax, random_color=False, borders=True):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    if borders:
        import cv2

        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        # Try to smooth contours
        contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours]
        mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2)
    ax.imshow(mask_image)


def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels == 1]
    neg_points = coords[labels == 0]
    ax.scatter(
        pos_points[:, 0],
        pos_points[:, 1],
        color="green",
        marker="*",
        s=marker_size,
        edgecolor="white",
        linewidth=1.25,
    )
    ax.scatter(
        neg_points[:, 0],
        neg_points[:, 1],
        color="red",
        marker="*",
        s=marker_size,
        edgecolor="white",
        linewidth=1.25,
    )


def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2))


def show_masks(
    image,
    masks,
    scores,
    point_coords=None,
    box_coords=None,
    input_labels=None,
    borders=True,
):
    for i, (mask, score) in enumerate(zip(masks, scores)):
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        show_mask(mask, plt.gca(), borders=borders)
        if point_coords is not None:
            assert input_labels is not None
            show_points(point_coords, input_labels, plt.gca())
        if box_coords is not None:
            # boxes
            show_box(box_coords, plt.gca())
        if len(scores) > 1:
            plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
        plt.axis("off")
        plt.show()


def show_anns(anns, borders=True):
    if len(anns) == 0:
        return
    sorted_anns = sorted(anns, key=(lambda x: x["area"]), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)

    img = np.ones(
        (
            sorted_anns[0]["segmentation"].shape[0],
            sorted_anns[0]["segmentation"].shape[1],
            4,
        )
    )
    img[:, :, 3] = 0
    for ann in sorted_anns:
        m = ann["segmentation"]
        color_mask = np.concatenate([np.random.random(3), [0.5]])
        img[m] = color_mask
        if borders:
            import cv2

            contours, _ = cv2.findContours(m.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            # Try to smooth contours
            contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours]
            cv2.drawContours(img, contours, -1, (0, 0, 1, 0.4), thickness=1)

    ax.imshow(img)


def mask_to_bbox(mask: np.ndarray, margin: float = 0.0) -> Tuple[int, int, int, int]:
    """
    Convert boolean mask (H,W) to bounding box (x, y, w, h).
    """
    H, W = mask.shape
    ys, xs = np.where(mask)
    if ys.size == 0 or xs.size == 0:
        return 0, 0, 0, 0
    x1, x2 = int(xs.min()), int(xs.max())
    y1, y2 = int(ys.min()), int(ys.max())
    w = (x2 - x1) * (1 + margin)
    h = (y2 - y1) * (1 + margin)
    xc = (x1 + x2) / 2
    yc = (y1 + y2) / 2
    x1 = xc - w / 2
    x2 = xc + w / 2
    y1 = yc - h / 2
    y2 = yc + h / 2
    x1 = max(x1, 0)
    y1 = max(y1, 0)
    x2 = min(x2, W)
    y2 = min(y2, H)
    if x2 - x1 < 56 or y2 - y1 < 56:
        return None
    return int(x1), int(y1), int(x2), int(y2)

### SAM2 example with center box targeting

In [None]:
import torch
from sam2.sam2_image_predictor import SAM2ImagePredictor

predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
image = Image.open("/home/akobylin/datasets/lct_2025/6_pliers/DSCN0615.JPG").convert("RGB")
w, h = image.size
input_point = np.array([[w // 2, h // 2]])
input_label = np.array([1])
input_box = np.array([w // 4, h // 4, w // 4 + w // 2, h // 4 + h // 2])

with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
    predictor.set_image(np.array(image))
    masks, scores, logits = predictor.predict(
        point_coords=None,  # input_point,
        point_labels=input_label,
        box=input_box,
        multimask_output=False,
    )
best_idx = int(np.argmax(scores)) if len(scores) > 0 else 0
chosen_mask = masks[best_idx].astype(np.uint8)
bbox = mask_to_bbox(chosen_mask, margin=0.0)
info = {
    "mask": chosen_mask,
    "score": float(scores[best_idx]) if len(scores) > 0 else None,
    "chosen_index": best_idx,
}

plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(chosen_mask, plt.gca(), borders=False)
show_box(bbox, plt.gca())
plt.axis("off")
plt.show()

### SAM2 example with anything

In [None]:
import torch
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator

predictor = SAM2AutomaticMaskGenerator.from_pretrained("facebook/sam2-hiera-base-plus")
image = Image.open("/home/akobylin/datasets/lct_2025/7_shernitsa/DSCN0314.JPG").convert("RGB")
image = image.resize((512, 512))

with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
    masks = predictor.generate(np.array(image))

plt.figure(figsize=(20, 20))
plt.imshow(image)
show_anns(masks)
plt.axis("off")
plt.show()

### Annotate

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from sam2.sam2_image_predictor import SAM2ImagePredictor
from tqdm import tqdm

predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")

In [None]:
root = Path("/home/akobylin/datasets/lct_2025")
labels = sorted([p.name for p in root.iterdir() if p.is_dir()])[:-2]
print(labels)
res = []
for label in labels:
    target = int(label.split("_")[0]) - 1
    paths = list((root / label).glob("*.JPG"))
    for path in tqdm(paths, desc=label):
        image = Image.open(path).convert("RGB")
        w, h = image.size
        input_point = np.array([[w // 2, h // 2]])
        input_label = np.array([1])
        input_box = np.array([w // 4, h // 4, w // 4 + w // 2, h // 4 + h // 2])

        with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
            predictor.set_image(np.array(image))
            masks, scores, logits = predictor.predict(
                point_coords=None,  # input_point,
                point_labels=input_label,
                box=input_box,
                multimask_output=False,
            )
        best_idx = int(np.argmax(scores)) if len(scores) > 0 else 0
        chosen_mask = masks[best_idx].astype(np.uint8)
        bbox = mask_to_bbox(chosen_mask, margin=0.0)
        if bbox is not None:
            res.append(
                {
                    "path": path.name,
                    "label": label,
                    "target": target,
                    "x0": bbox[0],
                    "y0": bbox[1],
                    "x1": bbox[2],
                    "y1": bbox[3],
                }
            )
res_df = pd.DataFrame(res)
print(res_df.shape)
res_df.to_csv(root / "annotations.csv", index=False)
res_df.head()

## Show annotated example

In [None]:
root = Path("/home/akobylin/datasets/lct_2025")
df = pd.read_csv(root / "annotations.csv")
print(df.shape)
df.head()

In [None]:
for _ in range(10):
    idx = np.random.randint(0, df.shape[0] - 1)
    row = df.iloc[idx]
    image = Image.open(root / row["label"] / row["path"]).convert("RGB")
    box = np.array([row["x0"], row["y0"], row["x1"], row["y1"]])
    crop = image.crop(tuple(box))
    plt.figure(figsize=(30, 10))
    plt.subplot(1, 3, 1)
    plt.imshow(image)
    show_box(box, plt.gca())
    plt.axis("off")
    plt.subplot(1, 3, 2)
    plt.imshow(crop)
    plt.axis("off")
    plt.show()