#### Download Images from Bing Image Downloader

In [2]:
from bing_image_downloader import downloader

In [None]:
# Download images of objects

categories = [
    "cargo aircraft", "commercial aircraft", "drone", "fighter jet", "fighter plane", "helicopter", 
    "light aircraft", "missile", "truck", "car", "tank", "bus", "van", 
    "cargo ship", "yacht", 
    "cruise ship",
    "warship", "sailboat"
]

for category in categories:
    downloader.download(category, limit=30, output_dir='images', adult_filter_off=True, force_replace=False, timeout=60)

In [None]:
# Download images of backgrounds

downloader.download("real-life background (landscape)", limit=150, output_dir='./', adult_filter_off=True)

##### Note: Manual intervention may be needed here to remove bad images

### Preparing Dataset

In [None]:
import albumentations as A
import json
import math
import numpy as np
import os
import random

from io import BytesIO
from PIL import Image, ImageDraw
from rembg import remove
from tqdm import tqdm

#### Removing backgrounds from objects

In [None]:
input_folder = 'images'
output_folder = 'cropped'

os.makedirs(output_folder, exist_ok=True)

def crop_to_foreground(img: Image.Image) -> Image.Image:
    # Get bounding box of non-transparent area
    bbox = img.getbbox()
    if bbox:
        return img.crop(bbox)
    return img  # return as-is if bbox is None

for cat in os.listdir(input_folder):
    if cat == '.ipynb_checkpoints': 
        continue

    input_dir = os.path.join(input_folder, cat)
    output_dir = os.path.join(output_folder, cat)

    os.makedirs(output_dir, exist_ok=True)

    for i, file in enumerate(os.listdir(input_dir)):
        input_path = os.path.join(input_dir, file)
        output_path = os.path.join(output_dir, f"{cat}_{i+1}.png")

        with open(input_path, 'rb') as inp:
            result = remove(inp.read())
            img = Image.open(BytesIO(result)).convert("RGBA")
            cropped_img = crop_to_foreground(img)
            cropped_img.save(output_path)

In [None]:
for cat in os.listdir('cropped'):
    relevant_dir = relevant = os.path.join('cropped', cat) 
    for i, x in enumerate(os.listdir(relevant_dir)):
        os.rename(os.path.join(relevant_dir, x), os.path.join(relevant_dir, f"{i+1}.png"))

In [52]:
for i, x in enumerate(os.listdir('backgrounds')):
    if x == '.ipynb_checkpoints': 
        continue
    
    os.rename(os.path.join('backgrounds', x), os.path.join('backgrounds', f"bg_{i+1}.jpg"))

#### Resizing background images

In [53]:
def resize_and_crop(img, target_width=1920, target_height=1080):
    # Step 1: Resize so both dimensions are large enough
    original_width, original_height = img.size
    aspect_ratio = target_width / target_height

    if original_width / original_height > aspect_ratio:
        # Resize based on height
        new_height = target_height
        new_width = int(target_height * (original_width / original_height))
    else:
        # Resize based on width
        new_width = target_width
        new_height = int(target_width / (original_width / original_height))

    img = img.resize((new_width, new_height), Image.LANCZOS)

    # Step 2: Center crop to 1920x1080
    left = (new_width - target_width) // 2
    top = (new_height - target_height) // 2
    right = left + target_width
    bottom = top + target_height

    return img.crop((left, top, right, bottom))


background_folder = 'backgrounds'
resized_folder = 'backgrounds_resized'
os.makedirs(resized_folder, exist_ok=True)

for file in os.listdir(background_folder):
    input_path = os.path.join(background_folder, file)
    output_path = os.path.join(resized_folder, file)

    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    try:
        img = Image.open(input_path).convert("RGB")
        resized_cropped_img = resize_and_crop(img)
        resized_cropped_img.save(output_path)
    except Exception as e:
        print(f"Failed to process {file}: {e}")

#### Pasting images of objects onto background (with augmentation)

In [56]:
MIN_AREA_AIR = 2000
MAX_AREA_AIR = 20000
MIN_AREA = 14000
MAX_AREA = 30000

NUM_DATA = 5000
PLOT_BBOX = False

cropped_dir = 'cropped'
backgrounds = [Image.open(f"backgrounds_resized/{f}").convert('RGBA') for f in os.listdir('backgrounds_resized')]
output_dir = os.path.join('generated', 'images')

os.makedirs(output_dir, exist_ok=True)

annotations = {"images": [], "annotations": [], "categories": []}

categories = ["cargo aircraft", "commercial aircraft", "drone", "fighter jet", "fighter plane", "helicopter", 
    "light aircraft", "missile", "truck", "car", "tank", "bus", "van", 
    "cargo ship", "yacht", "cruise ship", "warship", "sailboat"]
categories_idx = [i for i in range(18)]
probs = [0.08] * 7 + [0.04] * 11
imgs_fp = [[os.path.join('cropped', cat, img_fp) for img_fp in os.listdir(os.path.join('cropped', cat)) if img_fp.endswith('.png')] for cat in categories]

annotations = {"images": [], "annotations": [], "categories": [{"id": cat_id, "name": cat} for cat_id, cat in zip(categories_idx, categories)]}
# print(annotations)

albu_transforms = A.Compose([
    A.GaussNoise(var_limit=2500, p=0.5),
    A.ISONoise(p=0.5),
    A.Blur(p=0.15),
    A.MedianBlur(p=0.15),
    A.ToGray(p=0.1),
    A.CLAHE(p=0.15),
    A.RandomBrightnessContrast(p=0.6),
    A.RandomGamma(p=0.2),
    A.ImageCompression(quality_lower=75, p=0.5),
])

for i in tqdm(range(NUM_DATA)):
    bg = random.choice(backgrounds).copy()
    num_obj = random.randint(3, 6)

    obj_cat_idx = random.choices(categories_idx, weights=probs, k=num_obj)

    generated_fn = f"g_{i}.jpg"
    annotations["images"].append({
        "id": i,
        "file_name": generated_fn,
        "width": bg.width,
        "height": bg.height
    })
    
    if PLOT_BBOX:
        draw = ImageDraw.Draw(bg)

    for cat_idx in obj_cat_idx:
        img_fp = random.choice(imgs_fp[cat_idx])
        img = Image.open(img_fp).convert('RGBA')

        img_area = img.width * img.height

        if cat_idx < 8: 
            scale_min = math.sqrt(MIN_AREA_AIR / img_area)
            scale_max = math.sqrt(MAX_AREA_AIR / img_area)
        else: 
            scale_min = math.sqrt(MIN_AREA / img_area)
            scale_max = math.sqrt(MAX_AREA / img_area)

        scale = random.uniform(scale_min, scale_max) 

        img = img.resize((int(img.width * scale), int(img.height * scale)), Image.LANCZOS)

        rotation_angle = random.randint(-30, 30)

        img = img.rotate(rotation_angle, expand=True)
        alpha = img.split()[-1]
        img = img.crop(alpha.getbbox())

        max_x = bg.width - img.width
        max_y = bg.height - img.height

        x = random.randint(0, max_x)
        y = random.randint(0, max_y)

        bg.paste(img, (x, y), img)

        if PLOT_BBOX:
            bbox = [x, y, x + img.width, y + img.height]  # [left, top, right, bottom]
            draw.rectangle(bbox, outline='red', width=3)

        annotations["annotations"].append({
            "id": len(annotations["annotations"]),
            "image_id": i,
            "category_id": cat_idx,
            "bbox": [x, y, img.width, img.height],
            "area": img.width * img.height,
            "iscrowd": 0
        })
    
    generated_img_arr = np.array(bg.convert('RGB'))
    aug_img_arr = albu_transforms(image=generated_img_arr)
    aug_img = Image.fromarray(aug_img_arr['image'])
    aug_img.save(os.path.join(output_dir, generated_fn))

with open(os.path.join("generated", "annotations.json"), "w") as f:
    json.dump(annotations, f, indent=4)

100%|██████████| 5000/5000 [54:10<00:00,  1.54it/s]  
