# BIIGLE points to polygon with SAM

The goal of this notebook is to transform points saved from BIIGLE into polygons by running SAM inference on those points.

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch

from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

from utils import (
    download_model,
    get_image,
    get_point_coord,
    get_polygon,
    show_points,
    show_res_multi,
    save_image_masks,
)

We should first check that the files have been downloaded and are in the proper folders.

In [None]:
image_dir = Path("../results/BIIGLE_results")
print(image_dir.exists())

annotation_file = Path("../results/BIIGLE_results/2520_csv_image_annotation_report/14717-training-images-1.csv")
print(annotation_file.exists())

Let's load the annotation file and get the points.

In [None]:
df_annotations = pd.read_csv(annotation_file)
df_annotations

In [None]:
df_points = df_annotations[df_annotations["shape_name"] == "Point"]
df_points

In [None]:
df_points.groupby("filename").count()

The following function will download the large SAM2 model's weights from here only if the folder has no model downloaded:

https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt

For all available models see here: https://github.com/facebookresearch/sam2?tab=readme-ov-file#download-checkpoints

In [None]:
download_model()

In [None]:
from hydra import initialize, core

core.global_hydra.GlobalHydra.instance().clear()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# large sam2: works on gpu > 8g
sam2_checkpoint = "../models/sam2_hiera_large.pt"
model_cfg = "sam2_hiera_l.yaml"
config_dir = "../models/"

# base sam2: smaller version
#sam2_checkpoint = "../../SAM2_models/checkpoints/sam2_hiera_base_plus.pt"
#model_cfg = "sam2_hiera_b+.yaml"

with initialize(version_base=None, config_path=config_dir):
    sam2_model = build_sam2(model_cfg, sam2_checkpoint, device=device)

predictor = SAM2ImagePredictor(sam2_model)

In [None]:
user_id = 2813
first_name = "Nils"
last_name = "Jacobsen"
last_ann_label_id = int(df_annotations["annotation_label_id"].max())
last_label_id = int(df_annotations["label_id"].max())
last_annotation_id = int(df_annotations["annotation_id"].max())
polygon_id = 3

df_new_annotations = df_annotations.copy()

Finally, we can loop through every image and use the points to prompt SAM2 and generate masks.

In [None]:
results_dir = Path("../results/BIIGLE_results")
results_dir.mkdir(exist_ok=True)

for image_name, row_locations in df_points.groupby("filename").groups.items():
    print(f"\nProcessing {image_name}")
    if len(row_locations) < 3:
        # the image has only the Laser Points
        print("the image has only the laser points")
        continue

    # load the image
    test_image = get_image(image_name, image_dir)
    # get point coordinates
    point_prompts = df_points.loc[row_locations]["points"].apply(get_point_coord).to_list()
    point_prompts = np.array(point_prompts, dtype=np.float32)
    prompt_labels = np.ones(len(point_prompts))  # positive prompt
    # plot image + points
    fig, ax = plt.subplots(1, 1, figsize=(9, 8))
    ax.imshow(test_image)
    show_points(point_prompts, prompt_labels, ax, marker_size=15)
    plt.show()

    # get the SAM predictions for each point
    print(f"getting predictions for {len(point_prompts)} point prompts...")
    all_masks = []
    all_scores = []
    predictor.set_image(test_image)
    for i in range(len(point_prompts)):
        masks, scores, logits = predictor.predict(
            point_coords=point_prompts[i: i + 1],
            point_labels=prompt_labels[i: i + 1],
            multimask_output=False,
        )
        all_masks.append(masks)
        all_scores.append(scores)
    # show the results
    show_res_multi(all_masks, all_scores, image=test_image, input_box=None)

    # save all masks
    image_id = int(df_points.loc[row_locations[0], "image_id"])
    filename = df_points.loc[row_locations[0], "filename"]
    image_file = image_dir.joinpath(filename)
    save_image_masks(all_masks, image_file.stem, results_dir)

    # mask to polygons
    print("getting polygons from masks...")
    for i, masks in enumerate(all_masks):
        mask = masks[0]
        polygon = get_polygon(mask)
        # add an annotation row into the annotation csv table
        last_ann_label_id += 1
        last_label_id += 1
        last_annotation_id += 1
        row_idx = len(df_new_annotations.index)
        df_new_annotations.loc[row_idx, "image_id"] = image_id
        df_new_annotations.loc[row_idx, "filename"] = filename
        df_new_annotations.loc[row_idx, "user_id"] = user_id
        df_new_annotations.loc[row_idx, "firstname"] = first_name
        df_new_annotations.loc[row_idx, "lastname"] = last_name
        df_new_annotations.loc[row_idx, "annotation_label_id"] = last_ann_label_id
        df_new_annotations.loc[row_idx, "label_id"] = last_label_id
        df_new_annotations.loc[row_idx, "annotation_id"] = last_annotation_id
        df_new_annotations.loc[row_idx, "label_hierarchy"] = df_points.loc[row_locations[i], "label_hierarchy"]
        df_new_annotations.loc[row_idx, "label_name"] = df_points.loc[row_locations[i], "label_name"]
        df_new_annotations.loc[row_idx, "shape_id"] = polygon_id
        df_new_annotations.loc[row_idx, "shape_name"] = "Polygon"
        df_new_annotations.loc[row_idx, "points"] = str(polygon.ravel().tolist())


print("\n\nDone!")

In [None]:
df_new_annotations

In [None]:
df_new_annotations.to_csv(results_dir.joinpath("new_annotation.csv"), index=False)