<td>
   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a>
</td>

<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/integrations/sam/meta_sam_labelbox.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/blob/master/examples/integrations/sam/meta_sam_labelbox.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# Predicting bounding boxes around common objects using YOLOv8

First, we start with loading the YOLOv8 model, getting a sample image, and running the model on it to generate bounding boxes around some common objects.

In [1]:
!nvidia-smi

Tue Oct 24 14:05:26 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    23W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
HOME = os.getcwd()
print(HOME)

/content


In [3]:
# Pip install method (recommended)
!pip install ultralytics==8.0.20

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

import cv2
from PIL import Image
import numpy as np
from google.colab.patches import cv2_imshow
from ultralytics import YOLO
from IPython.display import display, Image

Ultralytics YOLOv8.0.20 ðŸš€ Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)
Setup complete âœ… (8 CPUs, 51.0 GB RAM, 27.1/166.8 GB disk)


Here we run inference on the image using the YOLOv8 model.

In [4]:
# You can also use the Labelbox Client API to get specific images or an entire
# dataset from your Catalog. Refer to these docs:
# https://labelbox-python.readthedocs.io/en/latest/#labelbox.client.Client.get_data_row

IMAGE_PATH = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/chairs.jpeg"

!wget -v {IMAGE_PATH}

--2023-10-24 14:05:40--  https://storage.googleapis.com/labelbox-datasets/image_sample_data/chairs.jpeg
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.119.207, 108.177.126.207, 108.177.127.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.119.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 503155 (491K) [image/jpeg]
Saving to: â€˜chairs.jpegâ€™


2023-10-24 14:05:41 (1.13 MB/s) - â€˜chairs.jpegâ€™ saved [503155/503155]



In [5]:
model = YOLO(f'{HOME}/yolov8n.pt')
results = model.predict(source="{}/{}".format(HOME, os.path.basename(IMAGE_PATH)), conf=0.25)

# print(results[0].boxes.xyxy) # print bounding box coordinates

# print(results[0].boxes.conf) # print confidence scores

#for c in results[0].boxes.cls:
# print(model.names[int(c)]) # print predicted classes

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to /content/yolov8n.pt...
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6.23M/6.23M [00:00<00:00, 167MB/s]

Ultralytics YOLOv8.0.20 ðŸš€ Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs


Here we visualize the bounding boxes on the image.

In [None]:
image_bgr = cv2.imread("{}/{}".format(HOME, os.path.basename(IMAGE_PATH)), cv2.IMREAD_COLOR)

for box in results[0].boxes.xyxy:
  cv2.rectangle(image_bgr, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)

cv2_imshow(image_bgr)

# Predicting segmentation masks using Meta's Segment Anything model

Now we load Meta's Segment Anything model and feed the bounding boxes to it, so it can generate segmentation masks within them.

In [None]:
%cd {HOME}

# Download SAM model SDK

import sys
!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'

In [None]:
# Download SAM model weights

%cd {HOME}
!mkdir {HOME}/weights
%cd {HOME}/weights

!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

CHECKPOINT_PATH = os.path.join(HOME, "weights", "sam_vit_h_4b8939.pth")
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))

In [None]:
import torch
import matplotlib.pyplot as plt
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

In [None]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"

In [None]:
sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
mask_predictor = SamPredictor(sam)

In [None]:
image_bgr = cv2.imread("{}/{}".format(HOME, os.path.basename(IMAGE_PATH)), cv2.IMREAD_COLOR)

transformed_boxes = mask_predictor.transform.apply_boxes_torch(results[0].boxes.xyxy, image_bgr.shape[:2])

mask_predictor.set_image(image_bgr)

masks, scores, logits = mask_predictor.predict_torch(
    boxes = transformed_boxes,
    multimask_output=False,
    point_coords=None,
    point_labels=None
)
masks = np.array(masks.cpu())

# print(masks)
# print(scores)

Here we visualize the segmentation masks drawn on the image.

In [None]:
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

final_mask = None
for i in range(len(masks) - 1):
  if final_mask is None:
    final_mask = np.bitwise_or(masks[i][0], masks[i+1][0])
  else:
    final_mask = np.bitwise_or(final_mask, masks[i+1][0])

plt.figure(figsize=(10, 10))
plt.imshow(image_rgb)
plt.axis('off')
plt.imshow(final_mask, cmap='gray', alpha=0.7)

plt.show()

# Uploading predicted segmentation masks with class names to Labelbox using Python SDK

In [None]:
# Install labelbox package

!pip install -q "labelbox[data]"

In [None]:
import uuid
import numpy as np
import labelbox as lb
import labelbox.types as lb_types

In [None]:
# Create a Labelbox API key for your account by following the instructions here:
# https://docs.labelbox.com/reference/create-api-key
# Then, fill it in here

API_KEY = ""
client = lb.Client(API_KEY)

In [None]:
# Create a Labelbox ObjectAnnotation of type mask for each predicted mask

# Identifying what values in the numpy array correspond to the mask annotation
color = (1, 1, 1)

class_names = []
for c in results[0].boxes.cls:
  class_names.append(model.names[int(c)])

annotations = []
for idx, mask in enumerate(masks):
  mask_data = lb_types.MaskData.from_2D_arr(np.asarray(mask[0], dtype="uint8"))
  mask_annotation = lb_types.ObjectAnnotation(
    name = class_names[idx], # this is the class predicted in Step 1 (object detector)
    value=lb_types.Mask(mask=mask_data, color=color),
  )
  annotations.append(mask_annotation)
  print(mask_annotation)

In [None]:
# Create a new dataset

# read more here: https://docs.labelbox.com/reference/data-row-global-keys
global_key = "my_unique_global_key"

test_img_url = {
    "row_data": IMAGE_PATH,
    "global_key": global_key
}

dataset = client.create_dataset(name="auto-mask-classification-dataset")
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()

print(f"Errors: {task.errors}")
print(f"Failed data rows: {task.failed_data_rows}")

In [None]:
# Create a new ontology if you don't have one

# Add all unique classes detected in Step 1
tools = []
for name in set(class_names):
  tools.append(lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=name))

ontology_builder = lb.OntologyBuilder(
    classifications=[],
    tools=tools
  )

ontology = client.create_ontology("auto-mask-classification-ontology",
                                  ontology_builder.asdict(),
                                  media_type=lb.MediaType.Image
                                  )

# Or get an existing ontology by name or ID (uncomment one of the below)

# ontology = client.get_ontologies("Demo Chair").get_one()

# ontology = client.get_ontology("clhee8kzt049v094h7stq7v25")

In [None]:
# Create a new project if you don't have one

# Project defaults to batch mode with benchmark quality settings if this argument is not provided
# Queue mode will be deprecated once dataset mode is deprecated
project = client.create_project(name="auto-mask-classification-project",
                                media_type=lb.MediaType.Image
                                )

# Or get an existing project by ID (uncomment the below)

# project = get_project("fill_in_project_id")

# If the project already has an ontology set up, comment out this line
project.setup_editor(ontology)

print(project.uid)

In [None]:
# Create a new batch of data for the project you specified above

data_row_ids = client.get_data_row_ids_for_global_keys([global_key])['results']

batch = project.create_batch(
    "auto-mask-classification-batch",  # each batch in a project must have a unique name
    data_rows=data_row_ids,

    # you can also specify global_keys instead of data_rows
    #global_keys=[global_key],  # paginated collection of data row objects, list of data row ids or global keys

    priority=1  # priority between 1(highest) - 5(lowest)
)

print(f"Batch: {batch}")

In [None]:
labels = []
labels.append(
    lb_types.Label(data=lb_types.ImageData(global_key=global_key),
                   annotations=annotations))

In [None]:
# Upload the predictions to your specified project and data rows as pre-labels

upload_job = lb.MALPredictionImport.create_from_objects(
    client=client,
    project_id=project.uid,
    name="mal_job" + str(uuid.uuid4()),
    predictions=labels
)
upload_job.wait_until_done()

print(f"Errors: {upload_job.errors}", )
print(f"Status of uploads: {upload_job.statuses}")

Now head on over to your Labelbox account! You should see a new project by the name you specified above, and when you hit Start Labeling, you should see all the predicted masks rendered.

Using the tools in the image editor, you can then modify or review the masks.