In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

Create a `HOME` constant.

In [2]:
import os
HOME = os.getcwd()
print("HOME:", HOME)

HOME: /home/sastrong/repos/segment-anything/notebooks


## Load Model

In [3]:
import torch

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_b"

In [4]:
import sys
!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-rzc5tym4
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-rzc5tym4
  Resolved https://github.com/facebookresearch/segment-anything.git to commit 6fdee8f2727f4506cfbbe553e23b895e27956588
  Preparing metadata (setup.py) ... [?25ldone
[?25h

In [5]:
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

chkpt_path = '/home/sastrong/repos/segment-anything/weights/sam_vit_b_01ec64.pth'
sam = sam_model_registry[MODEL_TYPE](checkpoint=chkpt_path).to(device=DEVICE)

predictor = SamPredictor(sam)

## Upload Image(s)

In [6]:
import cv2
import os
import glob

# Path to the directory with images
imgs_path = '/home/sastrong/repos/segment-anything/rocks_dataset'

# Get a list of all jpeg files in the directory
imgs_files = glob.glob(os.path.join(imgs_path, '*.jpg'))

# Initialize an empty list to store the images
imgs = []

# Loop over all files
for img_file in imgs_files:
    # Read the image
    img = cv2.imread(img_file)

    # Verify if the image is loaded
    if img is None:
        print(f'Could not open or find the image: {img_file}')
    else:
        # Get the dimensions of the image
        height, width, channels = img.shape

        imgs.append(img)

print(f"Successfully loaded {len(imgs)} images from {imgs_path}")


Successfully loaded 103 images from /home/sastrong/repos/segment-anything/rocks_dataset


## Automated Mask Generation

To run automatic mask generation, provide a SAM model to the `SamAutomaticMaskGenerator` class. Set the path below to the SAM checkpoint. Running on CUDA and with the default model is recommended.

In [7]:
mask_generator = SamAutomaticMaskGenerator(
    model=sam,
    points_per_side=32,
    pred_iou_thresh=0.86,
    stability_score_thresh=0.8,
    crop_n_layers=1,
    crop_n_points_downscale_factor=2,
    min_mask_region_area=50,  # Requires open-cv to run post-processing
)

### Generate masks with SAM

In [8]:
import cv2

results = []
for image in imgs:
  sam_result = mask_generator.generate(image)
  results.append(sam_result)

### Output format

`SamAutomaticMaskGenerator` returns a `list` of masks, where each mask is a `dict` containing various information about the mask:

* `segmentation` - `[np.ndarray]` - the mask with `(W, H)` shape, and `bool` type
* `area` - `[int]` - the area of the mask in pixels
* `bbox` - `[List[int]]` - the boundary box of the mask in `xywh` format
* `predicted_iou` - `[float]` - the model's own prediction for the quality of the mask
* `point_coords` - `[List[List[float]]]` - the sampled input point that generated this mask
* `stability_score` - `[float]` - an additional measure of mask quality
* `crop_box` - `List[int]` - the crop of the image used to generate this mask in `xywh` format

### Results visualisation with Supervision

As of version `0.5.0` Supervision has native support for SAM.

In [15]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import json

save_path = '/home/sastrong/repos/m_rocks_dataset'

for i in range(len(results)):
    img = imgs[i].copy()
    img_num = "{:03}".format(i)
    entry = {"img_num": img_num, "bboxes": [], "polygons": []}
    bboxes = []
    polygons = []
    for j in range(len(results[i])):
        if (results[i][j]['area'] > 10000):
            continue

        bboxes.append(results[i][j]['bbox'])

        # Assuming the mask is a binary 2D numpy array
        mask = (results[i][j]['segmentation'] * 255).astype(np.uint8)

        # Find contours in the mask
        contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        # Simplify contours to polygons
        polygons += [cv2.approxPolyDP(contour, 1, True).tolist() for contour in contours]

    entry["bboxes"] = bboxes
    entry["polygons"] = polygons

     # save the image to save_path/images (with the new name, i.e. 023.jpg)
    img_path = os.path.join(save_path, 'images', f'{img_num}.png')
    cv2.imwrite(img_path, img)

    # save the entry as a JSON file to save_path/labels
    json_path = os.path.join(save_path, 'labels', f'{img_num}.json')
    with open(json_path, 'w') as f:
        json.dump(entry, f)