In [4]:
import os
import shutil

# Define the class filter and mapping
classes_to_include = [
    'airplane', 'ship', 'storage-tank', 'ground-track-field',
    'harbor', 'bridge', 'large-vehicle', 'small-vehicle', 'helicopter'
]
class_mapping = {name: i for i, name in enumerate(classes_to_include)}

def convert_dota_to_yolo(split):
    dota_img_dir = f"DOTA/images/{split}"
    dota_label_dir = f"DOTA/labelTxt/{split}"
    yolo_img_dir = f"converted/images/{split}"
    yolo_label_dir = f"converted/labels/{split}"

    os.makedirs(yolo_img_dir, exist_ok=True)
    os.makedirs(yolo_label_dir, exist_ok=True)

    for label_file in os.listdir(dota_label_dir):
        if not label_file.endswith('.txt'):
            continue

        image_id = label_file.replace('.txt', '')
        image_file = os.path.join(dota_img_dir, image_id + '.png')
        label_path = os.path.join(dota_label_dir, label_file)

        yolo_label_lines = []
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 9:
                    continue
                coords = list(map(float, parts[:8]))
                class_name = parts[8]

                if class_name not in classes_to_include:
                    continue

                # Convert 4 points to YOLO format (bounding box center x, y, w, h)
                xs = coords[0::2]
                ys = coords[1::2]
                x_min = min(xs)
                y_min = min(ys)
                x_max = max(xs)
                y_max = max(ys)
                x_center = (x_min + x_max) / 2
                y_center = (y_min + y_max) / 2
                width = x_max - x_min
                height = y_max - y_min

                # Normalize (assuming image size 1024x1024)
                x_center /= 1024
                y_center /= 1024
                width /= 1024
                height /= 1024

                cls_id = class_mapping[class_name]
                yolo_line = f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
                yolo_label_lines.append(yolo_line)

        if yolo_label_lines:
            with open(os.path.join(yolo_label_dir, image_id + '.txt'), 'w') as out_f:
                out_f.write('\n'.join(yolo_label_lines))
            shutil.copy(image_file, os.path.join(yolo_img_dir, image_id + '.png'))

# Run for both train and val
convert_dota_to_yolo('train')
convert_dota_to_yolo('val')


In [5]:
import os
import shutil
from glob import glob
from tqdm import tqdm
from PIL import Image

# Classes to include
ALLOWED_CLASSES = {
    "airplane", "ship", "storage-tank", "ground-track-field",
    "harbor", "bridge", "large-vehicle", "small-vehicle", "helicopter"
}

# Mapping of DOTA class names to class IDs (for YOLO)
CLASS_NAME_TO_ID = {
    "airplane": 0,
    "ship": 1,
    "storage-tank": 2,
    "ground-track-field": 3,
    "harbor": 4,
    "bridge": 5,
    "large-vehicle": 6,
    "small-vehicle": 7,
    "helicopter": 8
}

# Convert 8-point oriented box to axis-aligned bbox (cx, cy, w, h)
def oriented_to_bbox(coords):
    xs = [float(coords[i]) for i in range(0, 8, 2)]
    ys = [float(coords[i]) for i in range(1, 8, 2)]
    xmin, xmax = min(xs), max(xs)
    ymin, ymax = min(ys), max(ys)
    cx = (xmin + xmax) / 2
    cy = (ymin + ymax) / 2
    w = xmax - xmin
    h = ymax - ymin
    return cx, cy, w, h

# Convert DOTA label file to YOLO, but only for allowed classes
def convert_label_file(dota_label_path, yolo_label_path, image_width, image_height):
    with open(dota_label_path, 'r') as f:
        lines = f.readlines()

    yolo_lines = []
    for line in lines:
        if line.strip() == "" or line.startswith("imagesource") or line.startswith("gsd"):
            continue

        parts = line.strip().split()
        if len(parts) < 9:
            continue

        coords = parts[:8]
        class_name = parts[8].lower()

        if class_name not in ALLOWED_CLASSES:
            continue

        class_id = CLASS_NAME_TO_ID[class_name]
        cx, cy, w, h = oriented_to_bbox(coords)

        # Normalize
        cx /= image_width
        cy /= image_height
        w /= image_width
        h /= image_height

        yolo_line = f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}"
        yolo_lines.append(yolo_line)

    if yolo_lines:
        with open(yolo_label_path, 'w') as f:
            f.write('\n'.join(yolo_lines))
        return True  # Valid label written
    return False     # No allowed objects found

def convert_split(split):
    dota_img_dir = f"DOTA/images/{split}"
    dota_label_dir = f"DOTA/labelTxt/{split}"
    yolo_img_dir = f"converted/images/{split}"
    yolo_label_dir = f"converted/labels/{split}"

    os.makedirs(yolo_img_dir, exist_ok=True)
    os.makedirs(yolo_label_dir, exist_ok=True)

    dota_images = glob(os.path.join(dota_img_dir, "*.png"))
    kept_count = 0

    for img_path in tqdm(dota_images, desc=f"Processing {split}"):
        filename = os.path.basename(img_path).replace('.png', '')
        label_path = os.path.join(dota_label_dir, filename + ".txt")

        if not os.path.exists(label_path):
            continue

        with Image.open(img_path) as img:
            w, h = img.size

        yolo_label_path = os.path.join(yolo_label_dir, filename + ".txt")
        valid = convert_label_file(label_path, yolo_label_path, w, h)

        if valid:
            shutil.copy(img_path, os.path.join(yolo_img_dir, filename + ".png"))
            kept_count += 1
        else:
            if os.path.exists(yolo_label_path):
                os.remove(yolo_label_path)

    print(f"✅ {kept_count} {split} images retained with valid classes.")

if __name__ == "__main__":
    convert_split("train")
    convert_split("val")


Processing train: 100%|████████████████████████████████████████████████████████████| 1411/1411 [00:20<00:00, 69.13it/s]


✅ 1165 train images retained with valid classes.


Processing val: 100%|████████████████████████████████████████████████████████████████| 458/458 [00:06<00:00, 69.82it/s]

✅ 379 val images retained with valid classes.



