In [1]:
import torch

# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')        # GPU
    print("Device type: GPU")
    print("CUDA device count:", torch.cuda.device_count())
    print("Current CUDA device:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    device = torch.device('cpu')         # CPU
    print("Device type: CPU")

Device type: GPU
CUDA device count: 1
Current CUDA device: 0
CUDA device name: NVIDIA A100-SXM4-80GB


In [None]:
# CONFIG 
PROJECT_ROOT = '/content/Advanced_YOLOv8_OpenImages'
CLASSES = ["Airplane", "Helicopter", "Drone", "Bird","Rocket"]
MAX_PER_CLASS = 500 # increase for full-scale experiments
TRAIN_IMG_SIZE_START = 640
TRAIN_IMG_SIZE_FINAL = 1280
BATCH = 8
EPOCHS_STAGE1 = 100 # generalized model training 
EPOCHS_STAGE2 = 80 # refined fine-tuning
MODEL_PRETRAIN = 'yolov8m.pt' # base pretrained checkpoint
DEVICE = 0
PROJECT = PROJECT_ROOT + '/runs'
EXPERIMENT = 'advanced_multi_scale'
DRIVE_SAVE_DIR = '/content/drive/MyDrive/YOLO_Advanced_Results' # change if needed
USE_TILING = True # set to True to enable tiling augmentation pipeline
TILE_SIZE = 640
TILE_OVERLAP = 0.2

In [3]:
!pip install --upgrade pip
!pip install ultralytics==8.* fiftyone opencv-python-headless matplotlib pandas tqdm roboflow

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m87.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting ultralytics==8.*
  Downloading ultralytics-8.3.204-py3-none-any.whl.metadata (37 kB)
Collecting fiftyone
  Downloading fiftyone-1.8.1-py3-none-any.whl.metadata (22 kB)
Collecting roboflow
  Downloading roboflow-1.2.10-py3-none-any.whl.metadata (9.7 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics==8.*)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.6.2-py3-none-any.whl.metadata (16 kB)
Collecting async_lru>=2 (from fiftyone)
  Downl

In [4]:
# Imports and folder setup
import os
from pathlib import Path
import shutil
import random
import math
import json
import gc, torch

In [5]:
ROOT = Path(PROJECT_ROOT)
ROOT.mkdir(parents=True, exist_ok=True)

In [6]:
# helper
def ensure(p):
    p = Path(p)
    p.mkdir(parents=True, exist_ok=True)
    return p


DATA_RAW = ensure(ROOT / 'openimages_raw')
EXPORT_DIR = ensure(ROOT / 'export')
DATASET_DIR = ensure(ROOT / 'dataset')
ensure(PROJECT_ROOT + '/runs')


print('Project root:', ROOT)

Project root: /content/Advanced_YOLOv8_OpenImages


In [7]:
# 1) Download subset of Open Images using FiftyOne (balanced per-class)
import fiftyone as fo
import fiftyone.zoo as foz


print('Downloading Open Images subset for classes:', CLASSES)
collected = []
seen = set()
for cls in CLASSES:
    print('\nDownloading class:', cls)
    ds = foz.load_zoo_dataset('open-images-v6', split='train', label_types=['detections'], classes=[cls], max_samples=MAX_PER_CLASS, shuffle=True)
    for s in ds:
        if s.id not in seen:
            collected.append(s)
            seen.add(s.id)
print('\nTotal collected samples:', len(collected))


combined = fo.Dataset('advanced_openimages_subset')
combined.add_samples(collected)

  return '(?ms)' + res + '\Z'


Downloading Open Images subset for classes: ['Airplane', 'Helicopter', 'Drone', 'Bird', 'Rocket']

Downloading class: Airplane
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v6/train/metadata/image_ids.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v6/train/metadata/image_ids.csv'


 100% |██████|    4.8Gb/4.8Gb [28.0s elapsed, 0s remaining, 163.9Mb/s]      


INFO:eta.core.utils: 100% |██████|    4.8Gb/4.8Gb [28.0s elapsed, 0s remaining, 163.9Mb/s]      


Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v6/train/metadata/classes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v6/train/metadata/classes.csv'


Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpi1i1bxpq/metadata/hierarchy.json'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpi1i1bxpq/metadata/hierarchy.json'


Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v6/train/labels/detections.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v6/train/labels/detections.csv'


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [23.9s elapsed, 0s remaining, 24.8 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [23.9s elapsed, 0s remaining, 24.8 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


You are running the oldest supported major version of MongoDB. Please refer to https://deprecation.voxel51.com for deprecation notices. You can suppress this exception by setting your `database_validation` config parameter to `False`. See https://docs.voxel51.com/user_guide/config.html#configuring-a-mongodb-connection for more information




Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [2.0s elapsed, 0s remaining, 243.1 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [2.0s elapsed, 0s remaining, 243.1 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created



Downloading class: Helicopter
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Found 1 images, downloading the remaining 499


INFO:fiftyone.utils.openimages:Found 1 images, downloading the remaining 499


 100% |███████████████████| 499/499 [24.1s elapsed, 0s remaining, 22.4 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 499/499 [24.1s elapsed, 0s remaining, 22.4 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use



Downloading class: Drone
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Ignoring invalid classes ['Drone']
You can view the available classes via `fiftyone.utils.openimages.get_classes()`


You can view the available classes via `fiftyone.utils.openimages.get_classes()`


Found 1 images, downloading the remaining 499


INFO:fiftyone.utils.openimages:Found 1 images, downloading the remaining 499


 100% |███████████████████| 499/499 [24.1s elapsed, 0s remaining, 23.3 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 499/499 [24.1s elapsed, 0s remaining, 23.3 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use



Downloading class: Bird
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [24.1s elapsed, 0s remaining, 22.3 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [24.1s elapsed, 0s remaining, 22.3 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use



Downloading class: Rocket
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [23.9s elapsed, 0s remaining, 23.8 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [23.9s elapsed, 0s remaining, 23.8 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'open-images-v6-train-500'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use



Total collected samples: 500
 100% |█████████████████| 500/500 [1.9s elapsed, 0s remaining, 264.8 samples/s]         


INFO:eta.core.utils: 100% |█████████████████| 500/500 [1.9s elapsed, 0s remaining, 264.8 samples/s]         


['68e16be7d4955f5e91c726e3',
 '68e16be7d4955f5e91c726fc',
 '68e16be7d4955f5e91c726fd',
 '68e16be7d4955f5e91c726fe',
 '68e16be7d4955f5e91c726ff',
 '68e16be7d4955f5e91c72700',
 '68e16be7d4955f5e91c72701',
 '68e16be7d4955f5e91c72702',
 '68e16be7d4955f5e91c72703',
 '68e16be7d4955f5e91c727bd',
 '68e16be7d4955f5e91c727be',
 '68e16be7d4955f5e91c727bf',
 '68e16be7d4955f5e91c727c0',
 '68e16be7d4955f5e91c727c1',
 '68e16be7d4955f5e91c727c2',
 '68e16be7d4955f5e91c727c3',
 '68e16be7d4955f5e91c727c4',
 '68e16be7d4955f5e91c727c5',
 '68e16be7d4955f5e91c727c6',
 '68e16be7d4955f5e91c727c7',
 '68e16be7d4955f5e91c727c8',
 '68e16be7d4955f5e91c727c9',
 '68e16be7d4955f5e91c727ca',
 '68e16be7d4955f5e91c727cb',
 '68e16be7d4955f5e91c727cc',
 '68e16be7d4955f5e91c727cd',
 '68e16be7d4955f5e91c727ce',
 '68e16be7d4955f5e91c727cf',
 '68e16be7d4955f5e91c727d0',
 '68e16be7d4955f5e91c727d1',
 '68e16be7d4955f5e91c727d2',
 '68e16be7d4955f5e91c727d3',
 '68e16be7d4955f5e91c727d4',
 '68e16be7d4955f5e91c727d5',
 '68e16be7d495

In [8]:
print (combined)

Name:        advanced_openimages_subset
Media type:  image
Num samples: 500
Persistent:  False
Tags:        []
Sample fields:
    id:               fiftyone.core.fields.ObjectIdField
    filepath:         fiftyone.core.fields.StringField
    tags:             fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    created_at:       fiftyone.core.fields.DateTimeField
    last_modified_at: fiftyone.core.fields.DateTimeField
    ground_truth:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [9]:
print(ds.get_field_schema())


OrderedDict({'id': <fiftyone.core.fields.ObjectIdField object at 0x7b1a7a737620>, 'filepath': <fiftyone.core.fields.StringField object at 0x7b1a7a6ed2b0>, 'tags': <fiftyone.core.fields.ListField object at 0x7b1a7a6ee5a0>, 'metadata': <fiftyone.core.fields.EmbeddedDocumentField object at 0x7b1a7a75a8a0>, 'created_at': <fiftyone.core.fields.DateTimeField object at 0x7b1a7af08ec0>, 'last_modified_at': <fiftyone.core.fields.DateTimeField object at 0x7b1a7a740aa0>, 'ground_truth': <fiftyone.core.fields.EmbeddedDocumentField object at 0x7b1a84c8ba70>})


In [10]:
# 2) Export to YOLOv5/YOLOv8 format
if (EXPORT_DIR/'images').exists():
    shutil.rmtree(EXPORT_DIR)

(EXPORT_DIR/'images').mkdir(parents=True)
(EXPORT_DIR/'labels').mkdir(parents=True)


combined.export(export_dir=str(EXPORT_DIR), dataset_type=fo.types.YOLOv5Dataset, label_field='ground_truth', classes=CLASSES)
print('Export complete to', EXPORT_DIR)

Directory '/content/Advanced_YOLOv8_OpenImages/export' already exists; export will be merged with existing files




  10% |█/---------------|  51/500 [105.2ms elapsed, 925.9ms remaining, 485.0 samples/s] 



  20% |███--------------|  98/500 [206.0ms elapsed, 845.1ms remaining, 475.7 samples/s] 



  29% |████\------------| 146/500 [307.4ms elapsed, 745.4ms remaining, 474.9 samples/s] 



  39% |██████|----------| 197/500 [408.3ms elapsed, 628.0ms remaining, 482.5 samples/s] 



  60% |██████████-------| 298/500 [609.1ms elapsed, 412.9ms remaining, 489.3 samples/s] 



  79% |█████████████|---| 393/500 [809.9ms elapsed, 220.5ms remaining, 485.3 samples/s] 



  98% |████████████████-| 490/500 [1.0s elapsed, 20.7ms remaining, 482.7 samples/s]     



 100% |█████████████████| 500/500 [1.1s elapsed, 0s remaining, 464.6 samples/s]         


INFO:eta.core.utils: 100% |█████████████████| 500/500 [1.1s elapsed, 0s remaining, 464.6 samples/s]         


Export complete to /content/Advanced_YOLOv8_OpenImages/export


In [11]:
# 3) Prepare train/val/test split (80/10/10) and final dataset layout
all_images = sorted((EXPORT_DIR/'images').rglob('*.jpg'))
random.seed(42)
random.shuffle(all_images)


n = len(all_images)
train_n = int(0.8 * n)
val_n = int(0.1 * n)
train_imgs = all_images[:train_n]
val_imgs = all_images[train_n:train_n+val_n]
test_imgs = all_images[train_n+val_n:]



In [12]:
# helper to copy
for split, imgs in [('train', train_imgs), ('val', val_imgs), ('test', test_imgs)]:
    img_out = DATASET_DIR / 'images' / split
    lbl_out = DATASET_DIR / 'labels' / split
    img_out.mkdir(parents=True, exist_ok=True)
    lbl_out.mkdir(parents=True, exist_ok=True)
    for img in imgs:
        dst_img = img_out / img.name
        shutil.copy(img, dst_img)

        # label path is under EXPORT_DIR/labels/<subset> where fiftyone created subfolders
        label_candidate = (EXPORT_DIR/'labels') / img.parent.name / (img.stem + '.txt')
        if not label_candidate.exists():
            label_candidate = (EXPORT_DIR/'labels') / (img.stem + '.txt')
        if label_candidate.exists():
            shutil.copy(label_candidate, lbl_out / (img.stem + '.txt'))
        else:
            open(lbl_out / (img.stem + '.txt'), 'w').close()



print('Prepared dataset in', DATASET_DIR)
print('Counts:', len(train_imgs), len(val_imgs), len(test_imgs))




Prepared dataset in /content/Advanced_YOLOv8_OpenImages/dataset
Counts: 400 50 50


In [13]:
# write data.yaml
DATA_YAML = ROOT / 'data.yaml'
names = [c.lower().replace(' ', '_') for c in CLASSES]
DATA_YAML.write_text(f"""
train: {DATASET_DIR / 'images' / 'train'}
val: {DATASET_DIR / 'images' / 'val'}
test: {DATASET_DIR / 'images' / 'test'}


nc: {len(CLASSES)}
names: {names}
""")
print('Wrote', DATA_YAML)
print(DATA_YAML.read_text())

Wrote /content/Advanced_YOLOv8_OpenImages/data.yaml

train: /content/Advanced_YOLOv8_OpenImages/dataset/images/train
val: /content/Advanced_YOLOv8_OpenImages/dataset/images/val
test: /content/Advanced_YOLOv8_OpenImages/dataset/images/test


nc: 5
names: ['airplane', 'helicopter', 'drone', 'bird', 'rocket']



In [14]:
# 4) Hyperparameter file (custom)
hyp_path = ROOT / 'hyp_custom.yaml'
hyp_path.write_text('''
lr0: 0.01
lrf: 0.01
momentum: 0.937
weight_decay: 0.0001
warmup_epochs: 3.0
box: 5.5
cls: 1.0
dfl: 2.5
mosaic: 1.0
mixup: 0.0
''')
print('Wrote hyp at', hyp_path)

Wrote hyp at /content/Advanced_YOLOv8_OpenImages/hyp_custom.yaml


In [None]:
# 5) Tiling helper to increase small-object recall
from math import ceil
import cv2


TILING_DIR = ROOT / 'tiling'
if USE_TILING:
    print('Generating tiled images (this may expand dataset size)')
    if TILING_DIR.exists():
        shutil.rmtree(TILING_DIR)
    for split in ['train', 'val', 'test']:
        src_img_dir = DATASET_DIR / 'images' / split
        src_lbl_dir = DATASET_DIR / 'labels' / split
        out_img_dir = TILING_DIR / 'images' / split
        out_lbl_dir = TILING_DIR / 'labels' / split
        out_img_dir.mkdir(parents=True, exist_ok=True)
        out_lbl_dir.mkdir(parents=True, exist_ok=True)
        for img_path in src_img_dir.glob('*.jpg'):
            img = cv2.imread(str(img_path))
            h, w = img.shape[:2]
            step = int(TILE_SIZE * (1 - TILE_OVERLAP))
            h_tiles = max(1, ceil((h - TILE_SIZE) / max(1, step)) + 1)
            w_tiles = max(1, ceil((w - TILE_SIZE) / max(1, step)) + 1)
            # read labels
            lbl_path = src_lbl_dir / (img_path.stem + '.txt')
            anns = []
            if lbl_path.exists():
                with open(lbl_path) as f:
                    for line in f:
                        parts = line.strip().split()
                        if len(parts) != 5: continue
                        cid, xc, yc, bw, bh = parts
                        anns.append((int(cid), float(xc), float(yc), float(bw), float(bh)))
            for i in range(h_tiles):
                for j in range(w_tiles):
                    y1 = i * step
                    x1 = j * step
                    y2 = min(y1 + TILE_SIZE, h)
                    x2 = min(x1 + TILE_SIZE, w)
                    y1 = max(0, y2 - TILE_SIZE)
                    x1 = max(0, x2 - TILE_SIZE)
                    tile = img[y1:y2, x1:x2]
                    tile_name = f'{img_path.stem}_tile_{i}_{j}.jpg'
                    out_path = out_img_dir / tile_name
                    cv2.imwrite(str(out_path), tile)
                    # write labels that fall into tile (convert normalized -> absolute -> tile-local normalized)
                    out_labels = []
                    for (cid, xcn, ycn, bwn, bhn) in anns:
                        # convert normalized to absolute
                        bx = xcn * w
                        by = ycn * h
                        bw_abs = bwn * w
                        bh_abs = bhn * h
                        x_min = bx - bw_abs/2
                        y_min = by - bh_abs/2
                        x_max = bx + bw_abs/2
                        y_max = by + bh_abs/2
                        # intersection with tile
                        ix_min = max(x_min, x1)
                        iy_min = max(y_min, y1)
                        ix_max = min(x_max, x2)
                        iy_max = min(y_max, y2)
                        if ix_max <= ix_min or iy_max <= iy_min:
                            continue
                        # new box in tile coords
                        nx_min = ix_min - x1
                        ny_min = iy_min - y1
                        nx_max = ix_max - x1
                        ny_max = iy_max - y1
                        nw = nx_max - nx_min
                        nh = ny_max - ny_min
                        if nw <= 1 or nh <= 1: continue
                        # normalize to tile size
                        n_xc = (nx_min + nw/2) / (x2 - x1)
                        n_yc = (ny_min + nh/2) / (y2 - y1)
                        n_bw = nw / (x2 - x1)
                        n_bh = nh / (y2 - y1)
                        out_labels.append(f"{cid} {n_xc:.6f} {n_yc:.6f} {n_bw:.6f} {n_bh:.6f}\n")
                    with open(out_lbl_dir / (tile_name.replace('.jpg', '.txt')), 'w') as f:
                        f.writelines(out_labels)
    print('Tiling complete. Tiled dataset at', TILING_DIR)
else:
    print('Tiling disabled. Using original dataset')

Generating tiled images (this may expand dataset size)
Tiling complete. Tiled dataset at /content/Advanced_YOLOv8_OpenImages/tiling


In [None]:
# If tiling used, update DATA_YAML to point to tiled images 
if USE_TILING:
    DATASET_FOR_TRAIN = TILING_DIR
else:
    DATASET_FOR_TRAIN = DATASET_DIR


DATA_YAML_TILED = ROOT / 'data_tiled.yaml'
DATA_YAML_TILED.write_text(f"""
train: {DATASET_FOR_TRAIN / 'images' / 'train'}
val: {DATASET_DIR / 'images' / 'val'}
test: {DATASET_DIR / 'images' / 'test'}


nc: {len(CLASSES)}
names: {[n.lower().replace(' ', '_') for n in CLASSES]}
""")
print('Wrote tiled data yaml at', DATA_YAML_TILED)

Wrote tiled data yaml at /content/Advanced_YOLOv8_OpenImages/data_tiled.yaml


In [17]:
# 6) Training helper functions: progressive resizing, two-stage, TTA
from ultralytics import YOLO


def train_stage(model_source, data_yaml, epochs, imgsz, name_suffix, resume=False, lr0=0.01):
    name = f"{EXPERIMENT}_{name_suffix}"
    print('Starting training:', name)
    model = YOLO(model_source)
    model.train(
        data=str(data_yaml),
        epochs=epochs,
        imgsz=imgsz,
        batch=BATCH,
        device=DEVICE,
        multi_scale=True,
        augment=True,
        cfg=str(hyp_path),
        project=PROJECT,
        name=name,
        exist_ok=True,
        lr0=lr0,
    )
    # return path to best.pt
    weights_dir = Path(PROJECT) / name / 'weights'
    best = weights_dir / 'best.pt'
    last = sorted(weights_dir.glob('*.pt'))[-1]
    return str(best if best.exists() else last)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [18]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


In [None]:
# Stage 1: generalized training 
stage1_data = DATA_YAML_TILED
ckpt_stage1 = train_stage(MODEL_PRETRAIN, stage1_data, EPOCHS_STAGE1, TRAIN_IMG_SIZE_START, 'stage1_general')
print('Stage 1 checkpoint:', ckpt_stage1)


gc.collect()
torch.cuda.empty_cache()


Starting training: advanced_multi_scale_stage1_general
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt': 100% ━━━━━━━━━━━━ 49.7MB 221.0MB/s 0.2s
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=8, bgr=0.0, box=5.5, cache=False, cfg=/content/Advanced_YOLOv8_OpenImages/hyp_custom.yaml, classes=None, close_mosaic=10, cls=1.0, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Advanced_YOLOv8_OpenImages/data_tiled.yaml, degrees=0.0, deterministic=True, device=0, dfl=2.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, lin

In [20]:
# Progressive resizing: continue training with larger imgsz for last fraction of epochs
print('Progressive resizing: continuing training at larger img size')
ckpt_stage1_prog = train_stage(ckpt_stage1, stage1_data, int(EPOCHS_STAGE1*0.3), TRAIN_IMG_SIZE_FINAL, 'stage1_prog', lr0=0.002)
print('Checkpoint after progressive resize:', ckpt_stage1_prog)


gc.collect()
torch.cuda.empty_cache()

Progressive resizing: continuing training at larger img size
Starting training: advanced_multi_scale_stage1_prog
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=8, bgr=0.0, box=5.5, cache=False, cfg=/content/Advanced_YOLOv8_OpenImages/hyp_custom.yaml, classes=None, close_mosaic=10, cls=1.0, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Advanced_YOLOv8_OpenImages/data_tiled.yaml, degrees=0.0, deterministic=True, device=0, dfl=2.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.002, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, mod

In [None]:
# Stage 2: refined fine-tuning 
refined_data = DATA_YAML_TILED 
ckpt_stage2 = train_stage(ckpt_stage1_prog, refined_data, EPOCHS_STAGE2, TRAIN_IMG_SIZE_FINAL, 'stage2_finetune', lr0=0.002)
print('Stage 2 checkpoint:', ckpt_stage2)

gc.collect()
torch.cuda.empty_cache()

Starting training: advanced_multi_scale_stage2_finetune
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=8, bgr=0.0, box=5.5, cache=False, cfg=/content/Advanced_YOLOv8_OpenImages/hyp_custom.yaml, classes=None, close_mosaic=10, cls=1.0, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Advanced_YOLOv8_OpenImages/data_tiled.yaml, degrees=0.0, deterministic=True, device=0, dfl=2.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=80, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.002, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/content/Advanced_YOLOv8_OpenImages/runs/advanced_mult

In [None]:
# 7) Validation with TTA (Test Time Augmentation)
print('Running validation with TTA on final checkpoint')
model = YOLO(ckpt_stage2)


val_metrics = model.val(data=str(DATA_YAML), imgsz=TRAIN_IMG_SIZE_FINAL, augment=True)
print('Validation metrics (with augment/TTA):')
print(val_metrics)

# Also evaluate standard (no TTA)
val_metrics_no_tta = model.val(data=str(DATA_YAML), imgsz=TRAIN_IMG_SIZE_FINAL, augment=False)
print('Validation metrics (no TTA):')
print(val_metrics_no_tta)

Running validation with TTA on final checkpoint
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
Model summary (fused): 92 layers, 25,842,655 parameters, 0 gradients, 78.7 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2747.3±847.6 MB/s, size: 317.7 KB)
[K[34m[1mval: [0mScanning /content/Advanced_YOLOv8_OpenImages/dataset/labels/val.cache... 50 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 50/50 109.5Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 4/4 1.2it/s 3.2s
                   all         50        100       0.83      0.634      0.681      0.465
              airplane         50        100       0.83      0.634      0.681      0.465
Speed: 8.6ms preprocess, 32.5ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1m/content/runs/detect/val[0m
Validation metrics (with augment/TTA):
ultralytics.utils.metrics.D

In [23]:
# 8) Lightweight hyperparameter sweep helper (example set) - run short epochs only
sweep_results = []
search_space = [
    {'lr0':0.01,'weight_decay':0.0001},
    {'lr0':0.005,'weight_decay':0.0001},
    {'lr0':0.01,'weight_decay':0.01},
]
for idx, cfg in enumerate(search_space):
    print(f"Sweep run {idx+1}/{len(search_space)}: {cfg}")
    m = YOLO(MODEL_PRETRAIN)
    name = f"{EXPERIMENT}_sweep_{idx}"
    m.train(data=str(DATA_YAML_TILED), epochs=12, imgsz=640, batch=BATCH, device=DEVICE, lr0=cfg['lr0'], weight_decay=cfg['weight_decay'], project=PROJECT, name=name, exist_ok=True)
    # evaluate quickly on val
    weights_dir = Path(PROJECT) / name / 'weights'
    ckpt = weights_dir / 'best.pt'
    model_eval = YOLO(str(ckpt))
    metrics = model_eval.val(data=str(DATA_YAML), imgsz=640)
    print('Sweep metrics:', metrics)
    sweep_results.append({'cfg':cfg, 'metrics':metrics})

Sweep run 1/3: {'lr0': 0.01, 'weight_decay': 0.0001}
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Advanced_YOLOv8_OpenImages/data_tiled.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=12, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=advanced_multi_scale_sweep_0, nbs=64, nms

In [24]:
# save sweep results
with open(ROOT / 'sweep_results.json', 'w') as f:
    json.dump(sweep_results, f, default=str, indent=2)
print('Sweep complete. Results saved to sweep_results.json')

Sweep complete. Results saved to sweep_results.json


In [25]:
# 9) Inference on test set and saving predictions
print('Running inference on test images and saving visual results')
model = YOLO(ckpt_stage2)
PRED_OUT = ROOT / 'predict_out'
if PRED_OUT.exists():
    shutil.rmtree(PRED_OUT)
PRED_OUT.mkdir(parents=True)


test_images = list((DATASET_DIR/'images'/'test').glob('*.jpg'))[:100]
for i, img in enumerate(test_images):
    model.predict(source=str(img), imgsz=TRAIN_IMG_SIZE_FINAL, conf=0.25, save=True, project=str(PRED_OUT), name='test_preds')
print('Inference complete. Check predict_out folder')

Running inference on test images and saving visual results

image 1/1 /content/Advanced_YOLOv8_OpenImages/dataset/images/test/46e080ac71d36ad0.jpg: 864x1280 2 airplanes, 78.3ms
Speed: 6.5ms preprocess, 78.3ms inference, 1.8ms postprocess per image at shape (1, 3, 864, 1280)
Results saved to [1m/content/Advanced_YOLOv8_OpenImages/predict_out/test_preds[0m

image 1/1 /content/Advanced_YOLOv8_OpenImages/dataset/images/test/1052c32b892e4deb.jpg: 864x1280 4 airplanes, 10.8ms
Speed: 6.7ms preprocess, 10.8ms inference, 1.5ms postprocess per image at shape (1, 3, 864, 1280)
Results saved to [1m/content/Advanced_YOLOv8_OpenImages/predict_out/test_preds2[0m

image 1/1 /content/Advanced_YOLOv8_OpenImages/dataset/images/test/0035fc6e652c751a.jpg: 960x1280 (no detections), 76.5ms
Speed: 6.7ms preprocess, 76.5ms inference, 0.6ms postprocess per image at shape (1, 3, 960, 1280)
Results saved to [1m/content/Advanced_YOLOv8_OpenImages/predict_out/test_preds3[0m

image 1/1 /content/Advanced_YOLOv8

In [26]:
# 10) Mount Google Drive and copy best checkpoints + results
try:
    from google.colab import drive
    drive.mount('/content/drive')
    ensure(DRIVE_SAVE_DIR)
    # copy best checkpoint
    best_ckpt = Path(ckpt_stage2)
    if best_ckpt.exists():
        shutil.copy(str(best_ckpt), DRIVE_SAVE_DIR)
    # copy sweep results and data yaml
    shutil.copy(str(ROOT/'sweep_results.json'), DRIVE_SAVE_DIR)
    shutil.copy(str(DATA_YAML), DRIVE_SAVE_DIR)
    print('Saved artifacts to Google Drive at', DRIVE_SAVE_DIR)
except Exception as e:
    print('Drive mount or copy failed:', e)

Mounted at /content/drive
Saved artifacts to Google Drive at /content/drive/MyDrive/YOLO_Advanced_Results


In [31]:
import shutil
shutil.make_archive('/content/YOLO_folder', 'zip', '/content/Advanced_YOLOv8_OpenImages')


'/content/YOLO_folder.zip'

In [32]:
shutil.make_archive('/content/runs', 'zip', '/content/runs')

'/content/runs.zip'

In [34]:
from google.colab import files
files.download('/content/YOLO_folder.zip')
files.download('/content/runs.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>