## Auto label data using foundation models

In [1]:
# # Do once
# %pip install -q \
#  autodistill \
#  autodistill-detic \
#  autodistill-grounding-dino \
#  supervision==0.9.0

### Display image sample

In [2]:
import os
import supervision as sv

HOME = os.getcwd()
IMAGE_DIR_PATH = f"{HOME}/images"

image_paths = sv.list_files_with_extensions(
    directory=IMAGE_DIR_PATH,
    extensions=["png", "jpg"])

print('image count:', len(image_paths))

image count: 3985


Plot sample of our image dataset.

In [3]:
SAMPLE_SIZE = 16
SAMPLE_GRID_SIZE = (4, 4)
SAMPLE_PLOT_SIZE = (16, 16)

In [4]:
# import cv2
# import supervision as sv

# titles = [
#     image_path.stem
#     for image_path
#     in image_paths[:SAMPLE_SIZE]]
# images = [
#     cv2.imread(str(image_path))
#     for image_path
#     in image_paths[:SAMPLE_SIZE]]

# sv.plot_images_grid(images=images, titles=titles, grid_size=SAMPLE_GRID_SIZE, size=SAMPLE_PLOT_SIZE)

### Autolabel images

Here we define ontology.

**Ontology** - an Ontology defines how your Base Model is prompted, what your Dataset will describe, and what your Target Model will predict. A simple Ontology is the CaptionOntology which prompts a Base Model with text captions and maps them to class names.

In [5]:
from autodistill.detection import CaptionOntology

# left -> prompt sent to model (should be descriptive)
# right -> class label we want the model to predict

# description from ChatGPT
# prompt: describe what a {class name} looks like in 15-20 words

ontology=CaptionOntology({
    "a person is a living being with a complex physical form including a head torso limbs and varied appearance based on ethnicity and individual traits": "person",
    "a rickshaw is a human powered or motorized vehicle with a simple frame seating and often two or three wheels" : "rickshaw",
    "a rickshaw van is a motorized three wheeled vehicle with an enclosed cabin for passengers or goods and typically a driver upfront": "rickshaw van",
    "an auto rickshaw is a compact three wheeled motorized vehicle with a cabin for passengers a driver upfront and a rear engine": "auto rickshaw",
    "a truck is a large motorized vehicle with a drivers cabin cargo area wheels and often a distinct front grille": "truck",
    "a pickup truck is a smaller motorized vehicle with a drivers cabin and an open cargo bed in the rear": "pickup truck",
    "a private car is a four wheeled motor vehicle designed for personal transportation typically with seating for passengers and an enclosed cabin": "private car",
    "a motorcycle is a two wheeled motor vehicle with a seat for a rider and often a pillion seat for a passenger": "motorcycle",
    "a bicycle is a human powered vehicle with two wheels pedals a frame handlebars and a seat for a rider": "bicycle",
    "a bus is a large motorized vehicle with a passenger cabin typically featuring multiple seats windows and a distinctive elongated shape": "bus",
    "a micro bus is a smaller motorized vehicle similar to a standard bus but more compact with seating for fewer passengers": "micro bus",
    "a covered van is a motorized vehicle with a closed cargo area often used for transporting goods and may have a drivers cabin upfront": "covered van",
    "a human hauler is a motorized vehicle designed for transporting passengers similar to an auto rickshaw or tuktuk with a cabin and driver upfront": "human hauler"
})

### Initiate base model and autolabel

**Base Model** - A Base Model is a large foundation model that knows a lot about a lot. We use a Base Model (along with unlabeled input data and an Ontology) to create a Dataset.

In [6]:
# DETIC
from autodistill_detic import DETIC
DATASET_DIR_PATH = f"{HOME}/bdss_detic"
base_model = DETIC(ontology=ontology)
dataset = base_model.label(
    input_folder=IMAGE_DIR_PATH,
    extension=".jpg",
    output_folder=DATASET_DIR_PATH)

a person is a living being with a complex physical form including a head torso limbs and varied appearance based on ethnicity and individual traits, a rickshaw is a human powered or motorized vehicle with a simple frame seating and often two or three wheels, a rickshaw van is a motorized three wheeled vehicle with an enclosed cabin for passengers or goods and typically a driver upfront, an auto rickshaw is a compact three wheeled motorized vehicle with a cabin for passengers a driver upfront and a rear engine, a truck is a large motorized vehicle with a drivers cabin cargo area wheels and often a distinct front grille, a pickup truck is a smaller motorized vehicle with a drivers cabin and an open cargo bed in the rear, a private car is a four wheeled motor vehicle designed for personal transportation typically with seating for passengers and an enclosed cabin, a motorcycle is a two wheeled motor vehicle with a seat for a rider and often a pillion seat for a passenger, a bicycle is a hu

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[11/02 14:54:35 fvcore.common.checkpoint]: [0m[Checkpointer] Loading from models/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth ...


Labeling /home/hasib/bdstreets-dataset/auto_labeler/images/train12933.jpg:   1%|          | 24/3985 [00:28<1:50:51,  1.68s/it]

### Display dataset sample


In [None]:
# ANNOTATIONS_DIRECTORY_PATH = f"{HOME}/bdss_detic/images/train"
# IMAGES_DIRECTORY_PATH = f"{HOME}/bdss_detic/labels/train"
# DATA_YAML_PATH = f"{HOME}/bdss_detic/data.yaml"

In [None]:
# import supervision as sv

# dataset = sv.DetectionDataset.from_yolo(
#     images_directory_path=IMAGES_DIRECTORY_PATH,
#     annotations_directory_path=ANNOTATIONS_DIRECTORY_PATH,
#     data_yaml_path=DATA_YAML_PATH)

# len(dataset)

In [None]:
# import supervision as sv

# image_names = list(dataset.images.keys())[:SAMPLE_SIZE]
# box_annotator = sv.BoxAnnotator()

# images = []
# for image_name in image_names:
#     image = dataset.images[image_name]
#     annotations = dataset.annotations[image_name]
#     labels = [
#         dataset.classes[class_id]
#         for class_id
#         in annotations.class_id]
#     annotates_image = box_annotator.annotate(
#         scene=image.copy(),
#         detections=annotations,
#         labels=labels)
#     images.append(annotates_image)

# sv.plot_images_grid(
#     images=images,
#     titles=image_names,
#     grid_size=SAMPLE_GRID_SIZE,
#     size=SAMPLE_PLOT_SIZE)

In [None]:
# if not os.path.exists("./detic_results"):
#   os.makedirs("./detic_results")

# for img, name in zip(images, image_names):
#   cv2.imwrite(f"./detic_results/{name}", img)

In [None]:
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

# !zip -r ./detic_results.zip ./detic_results
# from google.colab import files
# files.download("./detic_results.zip")