In [1]:
import ultralytics
from ultralytics import YOLO


In [18]:
from ultralytics.data.converter import convert_coco

convert_coco('combined_annotations.json', save_dir='coco_to_yolo')


COCO data converted successfully.
Results saved to /Users/aditya/Documents/Github/drone-vs-bird-challenge/coco_to_yolo


In [23]:
import json

# Path to COCO JSON file
coco_file = "combined_annotations.json"

# Load COCO JSON
with open(coco_file, "r") as f:
    coco_data = json.load(f)

# Check for annotations
images = {img["id"]: img["file_name"] for img in coco_data["images"]}
annotations = coco_data["annotations"]

for ann in annotations:
    image_id = ann["image_id"]
    if image_id not in images:
        # print(f"Annotation found for image {images[image_id]}: {ann}")
    # else:
    
        print(f"Invalid annotation with image_id: {image_id}")


In [25]:
import json
import os

def validate_coco_annotations(coco_file):
    with open(coco_file, "r") as f:
        coco_data = json.load(f)

    # Validate sections
    if not all(key in coco_data for key in ["images", "annotations", "categories"]):
        print("Error: Missing one or more required sections: 'images', 'annotations', 'categories'.")
        return False

    images = {img["id"]: img for img in coco_data["images"]}
    categories = {cat["id"]: cat["name"] for cat in coco_data["categories"]}
    annotations = coco_data["annotations"]

    print(f"Total Images: {len(images)}")
    print(f"Total Annotations: {len(annotations)}")
    print(f"Total Categories: {len(categories)}")

    # Track validation issues
    issues = []

    # Check if images have valid data
    for img_id, img in images.items():
        if "file_name" not in img or "width" not in img or "height" not in img:
            issues.append(f"Image ID {img_id} is missing required fields (file_name, width, height).")

    # Check annotations
    for ann in annotations:
        image_id = ann.get("image_id")
        category_id = ann.get("category_id")
        bbox = ann.get("bbox")

        # Validate image_id
        if image_id not in images:
            issues.append(f"Annotation ID {ann['id']} references invalid image_id {image_id}.")

        # Validate category_id
        if category_id not in categories:
            issues.append(f"Annotation ID {ann['id']} references invalid category_id {category_id}.")

        # Validate bbox
        if bbox:
            if len(bbox) != 4:
                issues.append(f"Annotation ID {ann['id']} has an invalid bbox (must have 4 elements).")
            else:
                x, y, w, h = bbox
                if w <= 0 or h <= 0:
                    issues.append(f"Annotation ID {ann['id']} has a non-positive bbox (w={w}, h={h}).")
                if image_id in images:
                    img = images[image_id]
                    if x < 0 or y < 0 or x + w > img["width"] or y + h > img["height"]:
                        issues.append(
                            f"Annotation ID {ann['id']} has a bbox out of image bounds (image_id={image_id})."
                        )
        else:
            issues.append(f"Annotation ID {ann['id']} is missing bbox.")

    # Check categories
    for cat_id in categories.keys():
        if not isinstance(cat_id, int):
            issues.append(f"Category ID {cat_id} is not an integer.")

    # Summary
    if issues:
        print("\nValidation Issues Found:")
        for issue in issues:
            print(f"- {issue}")
        print(f"\nTotal Issues: {len(issues)}")
    else:
        print("\nAll annotations are valid!")

    return not bool(issues)


# Test the function
coco_file = "combined_annotations.json"  # Replace with your COCO JSON file
is_valid = validate_coco_annotations(coco_file)
if is_valid:
    print("\nDataset validation passed!")
else:
    print("\nDataset validation failed. See issues above.")


Total Images: 106466
Total Annotations: 119244
Total Categories: 1

Validation Issues Found:
- Annotation ID 85 has a bbox out of image bounds (image_id=86).
- Annotation ID 86 has a bbox out of image bounds (image_id=87).
- Annotation ID 87 has a bbox out of image bounds (image_id=88).
- Annotation ID 2694 has a bbox out of image bounds (image_id=2732).
- Annotation ID 2695 has a bbox out of image bounds (image_id=2733).
- Annotation ID 2696 has a bbox out of image bounds (image_id=2734).
- Annotation ID 2697 has a bbox out of image bounds (image_id=2735).
- Annotation ID 2698 has a bbox out of image bounds (image_id=2736).
- Annotation ID 2699 has a bbox out of image bounds (image_id=2737).
- Annotation ID 2700 has a bbox out of image bounds (image_id=2738).
- Annotation ID 2701 has a bbox out of image bounds (image_id=2739).
- Annotation ID 2702 has a bbox out of image bounds (image_id=2740).
- Annotation ID 2703 has a bbox out of image bounds (image_id=2741).
- Annotation ID 2704 h

In [2]:
ultralytics.checks()

Ultralytics 8.3.63 🚀 Python-3.10.16 torch-2.5.1 CPU (Apple M3)
Setup complete ✅ (8 CPUs, 16.0 GB RAM, 129.5/460.4 GB disk)


In [None]:
model = YOLO('yolo11n.pt')  # 'n' denotes the nano version

In [4]:
model.device

device(type='cpu')

In [7]:
model.train(
    data='dataset.yaml',  # Path to the dataset configuration file
    epochs=50,            # Number of training epochs
    imgsz=640,            # Image size for training
    batch=16,             # Batch size (adjust based on your hardware capabilities)
    name='drone_vs_bird_finetune',  # Name of the training run
    pretrained=True,      # Use pre-trained weights
    device='cpu',         # GPU device index (set to 'cpu' on mac, GPU acceration is only for CUDA)
    amp=True              # Enable Automatic Mixed Precision (for faster training)   
)


Ultralytics 8.3.63 🚀 Python-3.10.16 torch-2.5.1 CPU (Apple M3)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=dataset.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=drone_vs_bird_finetune4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=Tr

[34m[1mtrain: [0mScanning /Users/aditya/Documents/Github/drone-vs-bird-challenge/dataset/labels/train... 0 images, 74526 backgrounds, 0 corrupt: 100%|██████████| 74526/74526 [00:05<00:00, 13071.71it/s]






[34m[1mtrain: [0mNew cache created: /Users/aditya/Documents/Github/drone-vs-bird-challenge/dataset/labels/train.cache


[34m[1mval: [0mScanning /Users/aditya/Documents/Github/drone-vs-bird-challenge/dataset/labels/val... 0 images, 15970 backgrounds, 0 corrupt: 100%|██████████| 15970/15970 [00:01<00:00, 13133.95it/s]






[34m[1mval: [0mNew cache created: /Users/aditya/Documents/Github/drone-vs-bird-challenge/dataset/labels/val.cache
Plotting labels to runs/detect/drone_vs_bird_finetune4/labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/drone_vs_bird_finetune4[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G          0      125.8          0          0        640:   1%|          | 34/4658 [03:48<8:38:22,  6.73s/it] 


KeyboardInterrupt: 

Ultralytics 8.3.63 🚀 Python-3.10.16 torch-2.5.1 CPU (Apple M3)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=dataset.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=drone_vs_bird_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=None, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/drone_vs_bird_finetune