# Dictionary keys are:

- `id`: integer, id #
- `image_id`: integer, image id #
- `category_id`: 1 for penguin, 2 for turtle
- `bbox`: list of integers representing the bounding box coordinates in Pascal VOC format [xmin, ymin, xmax, ymax]
- `area`: integer representing area of bounding box.
- `segmentation`: empty list; add segmentation masks if you'd like!
- `iscrowd`: integer 0 or 1; whether the instance is a crowd or individual. Not relevant to this particular use case, but is a necessary key for some models.

In [None]:
import os
import json

def convert(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[2] / 2.0) * dw
    y = (box[1] + box[3] / 2.0) * dh
    w = box[2] * dw
    h = box[3] * dh
    return [x, y, w, h]


def convert_annotation(image_info, output_path):
    image_id = str(image_info["image_id"]).zfill(3)
    with open(os.path.join(output_path, f"image_id_{image_id}.txt"), "w") as outfile:
        box = convert((640, 640), image_info["bbox"])
        outfile.write(
            f"{image_info['category_id']-1} {' '.join([str(a) for a in box])}\n"
        )


def coco_to_yolo(coco_annotation_file, output_path):
    os.makedirs(output_path, exist_ok=True)
    with open(coco_annotation_file) as file:
        data = json.load(file)
        for img in data:
            convert_annotation(img, output_path)


coco_to_yolo("datasets/train_annotations.json", "datasets/labels/train/")
coco_to_yolo("datasets/valid_annotations.json", "datasets/labels/valid/")

In [1]:
import torch
torch.cuda.is_available()

True

In [1]:
from ultralytics import YOLO

model = YOLO("datasets/weights/best.pt")

model.train(data="animal.yaml", imgsz=640, epochs=100, batch=8, workers=4, device=0)

Ultralytics YOLOv8.0.138  Python-3.11.2 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3070 Laptop GPU, 8192MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=datasets/weights/best.pt, data=animal.yaml, epochs=100, patience=50, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=4, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimiz