In [1]:
import os
import json
import sys
from sklearn.model_selection import train_test_split

# Aim to convert the current annotation format to a coco format
original_vlm_json_path = os.path.join("advanced", "vlm.jsonl")

# Check if the file exists
if not os.path.exists(original_vlm_json_path):
    print("The file does not exist")
    sys.exit(1)

# Load the json file
vlm_json = []
with open(original_vlm_json_path, "r") as f:
    for line in f:
        vlm_json.append(json.loads(line))

# # # Split the vlm_json into train and val first using sklearn
# train_vlm_json, val_vlm_json = train_test_split(vlm_json, test_size=0.1, random_state=42)

# train_vlm_json_path = os.path.join("train_vlm_yolo.jsonl")
# val_vlm_json_path = os.path.join("val_vlm_yolo.jsonl")

# with open(train_vlm_json_path, "w") as f:
#     for item in train_vlm_json:
#         f.write(json.dumps(item) + "\n")

# with open(val_vlm_json_path, "w") as f:
#     for item in val_vlm_json:
#         f.write(json.dumps(item) + "\n")

classes = []
full_classes = []

# Get all the classes
for data in vlm_json:
    for annotation in data["annotations"]:
        caption = annotation["caption"]
        full_classes.append(caption)
        if caption not in classes:
            classes.append(caption)

print(f"Length of classes: {len(classes)}")
print(f"Length of full classes: {len(full_classes)}")

class_ids= {}

for i, class_name in enumerate(classes):
    class_ids[class_name] = i
    
    
new_dir = "data"
old_dir = "advanced"
dataset_type = "train"


Length of classes: 126
Length of full classes: 27913


In [13]:
# Val images and labels
from PIL import Image

new_dir = "data"
old_dir = "advanced"
dataset_type = "val"

# Delete the directory if it exists
if os.path.exists(os.path.join(new_dir, dataset_type)):
    os.system(f"rm -rf {os.path.join(new_dir, dataset_type)}")

# Create the coco format
for vlm in val_vlm_json:
    image_path = vlm["image"]
    annotations = vlm["annotations"]
    new_image_path = os.path.join(new_dir, dataset_type, 'images', image_path)
    new_txt_path = new_image_path.replace("jpg", "txt").replace("images", "labels")
    # Copy the image to the new directory
    os.makedirs(os.path.dirname(new_image_path), exist_ok=True)
    os.makedirs(os.path.dirname(new_txt_path), exist_ok=True)
    os.system(f"cp {os.path.join(old_dir, 'images', image_path)} {new_image_path}")

    # Get the size of the image
    with Image.open(os.path.join(old_dir, "images", image_path)) as img:
        width, height = img.size
        # Since the image is not the actual image
        width = 1520
        height = 870

    with open(new_txt_path, "w") as f:
        for annotation in annotations:
            caption = annotation["caption"]
            bbox = annotation["bbox"]
            x, y, w, h = bbox
            x_center = x + w / 2
            y_center = y + h / 2
            class_id = class_ids[caption]

            # Normalize the values to be between 0 and 1
            x_center /= width
            y_center /= height
            w /= width
            h /= height
            
            f.write(f"{class_id} {x_center} {y_center} {w} {h}\n")
    

In [14]:
# Train images and labels
from PIL import Image

new_dir = "data"
old_dir = "advanced"
dataset_type = "train"

# Delete the directory if it exists
if os.path.exists(os.path.join(new_dir, dataset_type)):
    os.system(f"rm -rf {os.path.join(new_dir, dataset_type)}")

# Create the coco format
for vlm in train_vlm_json:
    image_path = vlm["image"]
    annotations = vlm["annotations"]
    new_image_path = os.path.join(new_dir, dataset_type, 'images', image_path)
    new_txt_path = new_image_path.replace("jpg", "txt").replace("images", "labels")
    # Copy the image to the new directory
    os.makedirs(os.path.dirname(new_image_path), exist_ok=True)
    os.makedirs(os.path.dirname(new_txt_path), exist_ok=True)
    os.system(f"cp {os.path.join(old_dir, 'images', image_path)} {new_image_path}")

    # Get the size of the image
    with Image.open(os.path.join(old_dir, "images", image_path)) as img:
        width, height = img.size
        # Since the image is not the actual image
        width = 1520
        height = 870

    with open(new_txt_path, "w") as f:
        for annotation in annotations:
            caption = annotation["caption"]
            bbox = annotation["bbox"]
            x, y, w, h = bbox
            x_center = x + w / 2
            y_center = y + h / 2
            class_id = class_ids[caption]

            # Normalize the values to be between 0 and 1
            x_center /= width
            y_center /= height
            w /= width
            h /= height
            
            f.write(f"{class_id} {x_center} {y_center} {w} {h}\n")
    

In [2]:
# Check if the files have been created
train_dir = os.path.join(new_dir, "train")
val_dir = os.path.join(new_dir, "val")
train_dir_images = os.path.join(train_dir, "images")
val_dir_images = os.path.join(val_dir, "images")

train_dir_labels = os.path.join(train_dir, "labels")
val_dir_labels = os.path.join(val_dir, "labels")

print(f"Number of images in train: {len(os.listdir(train_dir_images))}")
print(f"Number of images in val: {len(os.listdir(val_dir_images))}")

print(f"Number of labels in train: {len(os.listdir(train_dir_labels))}")
print(f"Number of labels in val: {len(os.listdir(val_dir_labels))}")

Number of images in train: 4596
Number of images in val: 511
Number of labels in train: 4596
Number of labels in val: 511


In [64]:
images_path_list = os.listdir(train_dir_images) 

for x in images_path_list:
    path = os.path.join(train_dir_images, x)
    image = Image.open(path)
    if image.size != (1520, 870):
        print("No right size")

In [14]:
# Add data.yaml file into data directory
# Format of the data.yaml file
"""
train: ../train/images
val: ../val/images

nc: number of classes
names: [class1, class2, class3, ...]
"""

data_yaml_path = os.path.join(new_dir, "data.yaml")

with open(data_yaml_path, "w") as f:
    f.write(f"train: ../train/images\n")
    f.write(f"val: ../val/images\n\n")
    f.write(f"nc: {len(classes)}\n")
    f.write(f"names: {classes}\n")

In [None]:
!pip install ultralytics

In [3]:
from ultralytics import YOLOWorld
from ultralytics.data.augment import Albumentations
from ultralytics.utils import LOGGER, colorstr

In [4]:
# model = YOLOWorld("2024_05_28_80epochs_yolov_world.pt")
best_model_path = os.path.join("runs", "detect", "train93", "weights", "best.pt")

# best_model = YOLOWorld(best_model_path)
model = YOLOWorld("yolov8x-worldv2.pt")


In [5]:
model.set_classes(classes)

In [6]:
new_dir = "data"
data_yaml_path = os.path.join(new_dir, "data.yaml")
data_yaml_path



'data/data.yaml'

In [8]:
!cat /home/jupyter/.config/Ultralytics/settings.yaml

settings_version: 0.0.4
datasets_dir: ''
weights_dir: weights
runs_dir: runs
uuid: ce422fe0ee5580ad5f410dc2974ada6fd84d08f9890a6e003516986927c64ab4
sync: true
api_key: ''
openai_api_key: ''
clearml: true
comet: true
dvc: true
hub: true
mlflow: true
neptune: true
raytune: true
tensorboard: true
wandb: true


In [19]:
!pip install -U albumentations

[0m

In [7]:
def __init__(self, p=1.0):
        """Initialize the transform object for YOLO bbox formatted params."""
        self.p = p
        self.transform = None
        prefix = colorstr("albumentations: ")
        try:
            import albumentations as A
            
            spatial_transforms = {
                "Affine",
                "BBoxSafeRandomCrop",
                "CenterCrop",
                "CoarseDropout",
                "Crop",
                "CropAndPad",
                "CropNonEmptyMaskIfExists",
                "D4",
                "ElasticTransform",
                "Flip",
                "GridDistortion",
                "GridDropout",
                "HorizontalFlip",
                "Lambda",
                "LongestMaxSize",
                "MaskDropout",
                "MixUp",
                "Morphological",
                "NoOp",
                "OpticalDistortion",
                "PadIfNeeded",
                "Perspective",
                "PiecewiseAffine",
                "PixelDropout",
                "RandomCrop",
                "RandomCropFromBorders",
                "RandomGridShuffle",
                "RandomResizedCrop",
                "RandomRotate90",
                "RandomScale",
                "RandomSizedBBoxSafeCrop",
                "RandomSizedCrop",
                "Resize",
                "Rotate",
                "SafeRotate",
                "ShiftScaleRotate",
                "SmallestMaxSize",
                "Transpose",
                "VerticalFlip",
                "XYMasking",
            }  # from https://albumentations.ai/docs/getting_started/transforms_and_targets/#spatial-level-transforms

            # Insert required transformation here
            T = [
                # A.RandomRain(p=0.4, slant_lower=-10, slant_upper=10, 
                #               drop_length=20, drop_width=1, drop_color=(200, 200, 200), 
                #               blur_value=5, brightness_coefficient=0.9, rain_type=None),
                # A.Rotate(limit = 10, p=0.5),
                # A.Blur(p=0.1),
                # A.HorizontalFlip(p=0.5),  # Adds horizontal flipping with a 50% probability
                # A.VerticalFlip(p=0.5),    # Adds 
                # A.MedianBlur(p=0.1),
                # A.ImageCompression(quality_lower=75, p=0.0),
                A.Rotate(limit=15, p=0.3),
                A.Blur(blur_limit=(3, 5), p=0.3),
                A.RandomSizedCrop(min_max_height=(int(0.8 * 640), 640), height=640, width=640, p=0.3)
            ]

            self.contains_spatial = any(transform.__class__.__name__ in spatial_transforms for transform in T)
            self.transform = (
                A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
                if self.contains_spatial
                else A.Compose(T)
            )
            
            LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
        except ImportError:  # package not installed, skip
            print("Importing error")
        except Exception as e:
            LOGGER.info(f"{prefix}{e}")

Albumentations.__init__ = __init__

In [None]:
results = model.train(data=data_yaml_path, epochs=100, imgsz=640, augment=True, batch=8)

# model.save("2024_05_29_100epochs_yolov_world.pt")

New https://pypi.org/project/ultralytics/8.2.26 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.22 🚀 Python-3.10.14 torch-1.13.1+cu117 CUDA:0 (Tesla T4, 14918MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x-worldv2.pt, data=data/data.yaml, epochs=100, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train23, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fal

[34m[1mtrain: [0mScanning /home/jupyter/data/train/labels.cache... 4596 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4596/4596 [00:00<?, ?it/s]

[34m[1malbumentations: [0mRotate(p=0.3, limit=(-15, 15), interpolation=1, border_mode=4, value=None, mask_value=None), Blur(p=0.3, blur_limit=(3, 5)), RandomSizedCrop(p=0.3, min_max_height=(512, 640), height=640, width=640, w2h_ratio=1.0, interpolation=1)



[34m[1mval: [0mScanning /home/jupyter/data/val/labels.cache... 511 images, 0 backgrounds, 0 corrupt: 100%|██████████| 511/511 [00:00<?, ?it/s]


Plotting labels to runs/detect/train23/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=7.7e-05, momentum=0.9) with parameter groups 104 weight(decay=0.0), 115 weight(decay=0.0005), 121 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train23[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      9.67G      1.394      3.371     0.9754         49        640: 100%|██████████| 575/575 [11:27<00:00,  1.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:10<00:00,  3.08it/s]


                   all        511       2801      0.211      0.346      0.225      0.169

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      8.65G      1.225      2.016     0.9219         39        640: 100%|██████████| 575/575 [11:19<00:00,  1.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:10<00:00,  3.17it/s]


                   all        511       2801      0.494      0.489      0.491      0.362

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100      8.64G      1.161      1.545     0.9084         28        640: 100%|██████████| 575/575 [11:15<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:10<00:00,  3.20it/s]

                   all        511       2801      0.662      0.601      0.655      0.495






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      8.62G      1.122      1.286     0.8972         37        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801      0.679      0.686      0.742      0.557






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      8.64G      1.095      1.119     0.8877         30        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.23it/s]

                   all        511       2801      0.725      0.758      0.798      0.591






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      8.65G      1.064      1.007     0.8831         38        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.847      0.757      0.842      0.635






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      8.65G       1.06     0.9366     0.8813         44        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801       0.87      0.775      0.861      0.645






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      8.62G      1.031     0.8744     0.8705         32        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801      0.851      0.804      0.878      0.655






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100      8.64G      1.026     0.8305     0.8722         37        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.23it/s]

                   all        511       2801      0.865      0.828      0.898      0.674






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      8.65G      1.008     0.7758     0.8678         47        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:10<00:00,  3.14it/s]

                   all        511       2801      0.866      0.858      0.907      0.687






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      8.64G      1.004     0.7578     0.8684         42        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.885      0.865      0.918      0.691






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      8.64G     0.9848     0.7185     0.8632         33        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.20it/s]

                   all        511       2801      0.882      0.879      0.927      0.696






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      8.64G     0.9743     0.6955     0.8597         52        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]

                   all        511       2801      0.906      0.892      0.935      0.724






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      8.64G     0.9672     0.6773     0.8592         29        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.912      0.895      0.941      0.712






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      8.65G     0.9481     0.6498     0.8532         40        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.905      0.903      0.943      0.711






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      8.64G     0.9503     0.6391     0.8551         43        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.23it/s]

                   all        511       2801      0.923      0.908       0.95       0.72






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      8.64G      0.946     0.6289     0.8527         41        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]

                   all        511       2801      0.932      0.894      0.951      0.725






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      8.64G      0.932     0.6081     0.8507         40        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801       0.94      0.903      0.953      0.727






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100      8.64G     0.9241     0.5975     0.8486         44        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.27it/s]

                   all        511       2801      0.939      0.915      0.959      0.733






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      8.62G     0.9161     0.5824     0.8467         33        640: 100%|██████████| 575/575 [11:16<00:00,  1.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.21it/s]

                   all        511       2801      0.942      0.927      0.963      0.736






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100      8.65G      0.907     0.5671     0.8463         16        640: 100%|██████████| 575/575 [11:19<00:00,  1.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801      0.944      0.935      0.965      0.732






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100      8.65G     0.9068     0.5629     0.8458         41        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.26it/s]

                   all        511       2801      0.944       0.92      0.964      0.737






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      8.64G     0.9007     0.5545     0.8438         35        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.26it/s]

                   all        511       2801      0.953      0.927      0.967      0.735






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      8.64G     0.8898     0.5419     0.8425         38        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.23it/s]

                   all        511       2801      0.954      0.925      0.968      0.738






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      8.64G     0.8825     0.5367     0.8395         19        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.26it/s]

                   all        511       2801       0.96      0.933      0.971      0.744






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      8.65G     0.8697     0.5191      0.837         25        640: 100%|██████████| 575/575 [11:15<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801      0.954      0.941      0.974      0.746






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      8.65G     0.8749     0.5178     0.8376         38        640: 100%|██████████| 575/575 [11:15<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.26it/s]

                   all        511       2801      0.963      0.935      0.974      0.747






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100      8.63G     0.8721     0.5148     0.8385         27        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.27it/s]

                   all        511       2801      0.959      0.939      0.975      0.753






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      8.64G     0.8599     0.5102      0.837         23        640: 100%|██████████| 575/575 [11:13<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]

                   all        511       2801      0.964      0.941      0.975      0.754






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      8.66G     0.8573     0.5005     0.8349         57        640: 100%|██████████| 575/575 [11:15<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.974      0.933      0.975      0.747






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      8.65G     0.8546     0.4966     0.8339         38        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.24it/s]

                   all        511       2801      0.967      0.941      0.975      0.749






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      8.65G     0.8505     0.4909      0.835         26        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.23it/s]

                   all        511       2801      0.966      0.937      0.976      0.754






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      8.65G     0.8432     0.4865     0.8343         25        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.22it/s]

                   all        511       2801      0.965      0.943      0.977      0.751






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      8.64G     0.8398     0.4768     0.8335         34        640: 100%|██████████| 575/575 [11:14<00:00,  1.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:09<00:00,  3.25it/s]

                   all        511       2801      0.969      0.944      0.977      0.758






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100      8.62G     0.8376     0.4777     0.8282         62        640:  38%|███▊      | 218/575 [04:16<06:58,  1.17s/it]

In [10]:
# Conduct model validation on the COCO8 example dataset
metrics = model.val(data=data_yaml_path)

Ultralytics YOLOv8.2.22 🚀 Python-3.10.14 torch-1.13.1+cu117 CUDA:0 (Tesla T4, 14918MiB)
YOLOv8s-worldv2 summary: 422 layers, 164026601 parameters, 151786521 gradients, 61.6 GFLOPs


[34m[1mval: [0mScanning /home/jupyter/data/val/labels.cache... 1022 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1022/1022 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 64/64 [00:37<00:00,  1.70it/s]


                   all       1022       5574      0.215       0.24     0.0188     0.0138
          grey missile       1022         54          1          0    0.00537    0.00322
red, white, and blue light aircraft       1022         52     0.0139      0.538     0.0155    0.00877
green and black missile       1022         44          1          0     0.0421     0.0358
white and red helicopter       1022         31     0.0124     0.0323     0.0104     0.0067
grey camouflage fighter jet       1022         49          0          0     0.0107     0.0083
grey and white fighter plane       1022         52     0.0174      0.673     0.0209      0.016
 white and black drone       1022         61          0          0     0.0108    0.00647
white and black fighter jet       1022         47     0.0164      0.638     0.0183      0.013
         white missile       1022         52          1          0     0.0173     0.0134
black and white commercial aircraft       1022         41     0.0102      0.75

In [9]:
metrics = model.val(data=data_yaml_path)

Ultralytics YOLOv8.2.22 🚀 Python-3.10.14 torch-1.13.1+cu117 CUDA:0 (Tesla T4, 14918MiB)
YOLOv8s-worldv2 summary (fused): 195 layers, 12749288 parameters, 0 gradients, 61.6 GFLOPs


[34m[1mval: [0mScanning /home/jupyter/data/val/labels.cache... 1022 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1022/1022 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 64/64 [00:15<00:00,  4.00it/s]


                   all       1022       5574      0.922      0.862      0.927      0.722
          grey missile       1022         54      0.753      0.556      0.693       0.53
red, white, and blue light aircraft       1022         52          1          1      0.995      0.734
green and black missile       1022         44      0.917      0.886       0.94      0.757
white and red helicopter       1022         31      0.967      0.949      0.982      0.719
grey camouflage fighter jet       1022         49      0.752      0.653      0.815      0.641
grey and white fighter plane       1022         52      0.916      0.981      0.993      0.801
 white and black drone       1022         61          1      0.844      0.914      0.692
white and black fighter jet       1022         47      0.983      0.979      0.994      0.735
         white missile       1022         52       0.74      0.673      0.775      0.575
black and white commercial aircraft       1022         41      0.869      0.95

In [11]:
test_image_path = os.path.join("advanced", "images", "image_0.jpg")

results = model.predict(test_image_path)

results[0].show()


image 1/1 /home/jupyter/advanced/images/image_0.jpg: 384x640 1 blue, yellow, and green fighter plane, 1 grey and white light aircraft, 1 white and blue fighter jet, 1 yellow, red, and blue fighter plane, 1 black and white missile, 1 grey and yellow fighter plane, 1 white and red fighter jet, 19.6ms
Speed: 18.4ms preprocess, 19.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmptb9y45dh.PNG'


In [17]:
bboxes = []

bboxes_tensors = results[0].boxes.xywh
for tensor in bboxes_tensors:
    bboxes.append(tensor.cpu().numpy())

In [18]:
bboxes

[array([     576.76,      137.63,      99.661,      41.889], dtype=float32),
 array([        578,      138.45,      98.336,      40.679], dtype=float32),
 array([     1044.1,      93.673,      30.411,      34.963], dtype=float32),
 array([     1044.1,      93.753,      30.103,      34.893], dtype=float32),
 array([     742.72,      538.83,      79.015,      65.385], dtype=float32),
 array([     580.46,      621.92,      17.883,      17.654], dtype=float32),
 array([     743.31,      538.85,      80.516,        67.6], dtype=float32)]

In [19]:
from PIL import Image, ImageDraw
import torch

image = Image.open(test_image_path)

# Create a draw object
draw = ImageDraw.Draw(image)

for bbox in bboxes:
# Extract the center coordinates, width, and height
    x_center, y_center, width, height = bbox

    x1 = x_center - width / 2
    y1 = y_center - height / 2
    x2 = x_center + width / 2
    y2 = y_center + height / 2


    # Draw the rectangle
    draw.rectangle([x1, y1, x2, y2], outline="red", width=2)

# Display the image (if using Jupyter Notebook)
image.show()

# If not in a notebook, save or display the image as needed
image.save('output_image_with_bbox_2.jpg')


/usr/bin/xdg-open: 882: www-browser: not found
/usr/bin/xdg-open: 882: links2: not found
/usr/bin/xdg-open: 882: elinks: not found
/usr/bin/xdg-open: 882: links: not found
/usr/bin/xdg-open: 882: lynx: not found
/usr/bin/xdg-open: 882: w3m: not found
xdg-open: no method available for opening '/var/tmp/tmpspw2nzaq.PNG'
