In [None]:
pip install autodistill autodistill-grounded-sam autodistill-yolov8

In [None]:
# these are the necessary libraries
from autodistill_yolov8 import YOLOv8
from autodistill_grounded_sam import GroundedSAM
from autodistill.detection import CaptionOntology
                                                    # caption*           # class**
annotator = GroundedSAM(ontology = CaptionOntology({"Human fingernail.": "fingernail"})) # dict can contain multiple "caption-class" pairs

annotator.label(
  input_folder = "./images", # insert all the training images here (name can be changed)
  output_folder = "./dataset" # this folder will be auto-generated, so it's not necessary create it before (name can be changed)
) # this method splits the given images in "valid" and "train" ones and auto-annotated all

model = YOLOv8("./yolov8m-seg.pt") # this function auto-downloads the YOLOv8 pt model file ***

"""
here are 2 of the key features of SAM (Segment Anything Model, an AI computer vision model created by Meta):
∙ it can identify and segment objects in an image thanks to text captions
∙ it is able to learn the relationships between words and objects
GroundedSAM function takes from this large model only the necessary images to learn how to annotate the given ones
* caption is a prompt: it must be as descriptive as possible, concise and grammatically correct (that's why the capital letter and the dot)
** class is a label that will be use for the respective caption in the generated annotations: label should be the name of the object
*** available instance segmentation options (from smallest, but less accurate to largest, but more accurate):
∙ yolov8n-seg.pt -> nano (3.4M parameters)
∙ yolov8s-seg.pt -> small (11.8M parameters)
∙ yolov8m-seg.pt -> medium (27.3M parameters)
∙ yolov8l-seg.pt -> large (46.0M parameters)
∙ yolov8x-seg.pt -> extra-large (71.8M parameters)
"""

In [None]:
model.train("./dataset/data.yaml") # this method trains a model based on the auto-annotated images (data.yaml is default configuration file)

In [None]:
predict = model.predict("test.jpg") # this method performs an instance segmentation on the input image to predict where the objects are