In [None]:
pip install Augmentor supervision opencv-python autodistill autodistill-yolov8 autodistill-grounded-sam

In [None]:
# This is the necessary library to import for this code snippet.
from Augmentor import Pipeline

augment = Pipeline("./images", "augmented") # This function defines a pipeline with input and output folders as arguments (output folder hasn't to exist necessarily, it is created as subfolder of input one).
augment.flip_left_right(probability = 0.5)
augment.flip_top_bottom(probability = 0.5)
augment.zoom(probability = 0.5, min_factor = 1.1, max_factor = 2)
augment.rotate(probability = 0.5, max_left_rotation = 5, max_right_rotation = 10)

augment.sample(10000) # This method takes as input the number of images to generate and starts the augmentation.

In [None]:
# These are the necessary libraries to import for this code snippet and the next two ones.
from autodistill_yolov8 import YOLOv8
from autodistill_grounded_sam import GroundedSAM
from autodistill.detection import CaptionOntology
                                                   # Caption*           # Class**
annotate = GroundedSAM(ontology = CaptionOntology({"Human fingernail.": "fingernail"})) # Dict can contain multiple "caption-class" pairs.

annotate.label(
  input_folder = "./images/augmented", # Insert all the training images here (folder path name can be changed).
  output_folder = "./dataset" # This folder is auto-created, so it's not necessary create it before (folder path name can be changed).
) # This method splits the given images in "valid" and "train" ones and auto-annotated all of them.

model = YOLOv8("./yolov8m-seg.pt") # This function downloads the pt YOLOv8 model file***
"""
Here are 2 of the key features of SAM (Segment Anything Model, an AI computer vision model created by Meta):
∙ It can identify and segment objects in an image thanks to text captions;
∙ It is able to learn the relationships between words and objects.
GroundedSAM function takes from this large model only the necessary images to learn how to annotate the given ones.

* Caption is a prompt: it should be as descriptive as possible, concise and grammatically correct (that's why the capital letter and the dot).
** Class is a label that is used for the respective caption in the generated annotations: label should be the name of the object.
*** Available instance segmentation options (from smallest, but less accurate to largest, but more accurate):
∙ yolov8n-seg.pt -> nano (3.4M parameters)
∙ yolov8s-seg.pt -> small (11.8M parameters)
∙ yolov8m-seg.pt -> medium (27.3M parameters)
∙ yolov8l-seg.pt -> large (46.0M parameters)
∙ yolov8x-seg.pt -> extra-large (71.8M parameters)

The key concept of this code is to use a bigger slower model to train smaller faster portable ones.
"""

In [None]:
model.train("./dataset/data.yaml") # This method trains a model based on auto-annotated images (data.yaml is the default configuration file).

In [None]:
predict = model.predict("./test.jpg")[0] # This method performs an instance segmentation on the input image to predict where the objects are.

In [None]:
# These are the necessary libraries to import for this code snippet.
from cv2 import imread, imwrite
from supervision import Detections, MaskAnnotator

imwrite("./test.jpg",
        MaskAnnotator().annotate(
            scene = imread("./test.jpg"),
            detections = Detections.from_yolov8(predict),
            opacity = 1.0
))