## Code To Generate Datasets using the Generated Images!

In [None]:
# Imports 
import os
from rembg import remove
from PIL import Image
import io
import random
import yaml

from helper_functions_image_creation import *

### Cut out Background of generated images and crop them

In [None]:
def remove_background_and_crop(input_dir='./all_generated_objects', output_dir='./all_objects_no_background'):
    """
    Removes background from PNGs, crops to object bounds, and saves to output_dir.
    """
    os.makedirs(output_dir, exist_ok=True)

    for subfolder in os.listdir(input_dir):
        subfolder_path = os.path.join(input_dir, subfolder)

        if os.path.isdir(subfolder_path):
            output_subfolder = os.path.join(output_dir, subfolder)
            os.makedirs(output_subfolder, exist_ok=True)

            for file_name in os.listdir(subfolder_path):
                if file_name.endswith('.png'):
                    input_path = os.path.join(subfolder_path, file_name)
                    output_file_name = file_name.replace('.png', '_no_background.png')
                    output_path = os.path.join(output_subfolder, output_file_name)

                    with open(input_path, 'rb') as f:
                        input_data = f.read()

                    output_data = remove(input_data)
                    output_image = Image.open(io.BytesIO(output_data))
                    bbox = output_image.getbbox()
                    cropped_image = output_image.crop(bbox)
                    cropped_image.save(output_path)


In [None]:
def load_images(object_folders, background_folder, ratio=[60, 30, 10]):
    """
    Load and split images into training, validation, and test sets based on the given ratio.
    Returns object_dict, background_dict, and category_to_id (mapping category name -> int ID).
    """
    objects_dict = {}
    backgrounds = []

    # Load object images into dictionary
    for subfolder in os.listdir(object_folders):
        subfolder_path = os.path.join(object_folders, subfolder)

        if os.path.isdir(subfolder_path):
            object_files = [f for f in os.listdir(subfolder_path) if f.endswith('.png')]
            objects_dict[subfolder] = object_files

    # Load background images
    for f in os.listdir(background_folder):
        if f.endswith('.png'):
            backgrounds.append(os.path.join(background_folder, f))

    # Split background images based on the ratio
    train_bg = backgrounds[:int(len(backgrounds) * ratio[0] / 100)]
    val_bg = backgrounds[int(len(backgrounds) * ratio[0] / 100):int(len(backgrounds) * (ratio[0] + ratio[1]) / 100)]
    test_bg = backgrounds[int(len(backgrounds) * (ratio[0] + ratio[1]) / 100):]

    # Split object images into dictionaries for each set
    train_objects = {}
    val_objects = {}
    test_objects = {}

    for category, files in objects_dict.items():
        train_size = int(len(files) * ratio[0] / 100)
        val_size = int(len(files) * ratio[1] / 100)
        
        train_objects[category] = files[:train_size]
        val_objects[category] = files[train_size:train_size + val_size]
        test_objects[category] = files[train_size + val_size:]

    # Create category-to-ID mapping
    category_to_id = {category: idx for idx, category in enumerate(sorted(objects_dict.keys()))}

    print("done loading and splitting images")

    object_dict = {"train": train_objects, "val": val_objects, "test": test_objects}
    backgrounds_dict = {"train": train_bg, "val": val_bg, "test": test_bg}
    
    return object_dict, backgrounds_dict, category_to_id



def generate_images_and_labels(objects_dict, backgrounds, objects_folder, output_folder, category_to_id, output_labels, count=1000):
    """
    Generate images using a specific set of objects and backgrounds.
    Objects and backgrounds should be passed as dictionaries and lists (from `load_images`).
    """
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(output_labels, exist_ok=True)


    for i in range(count):
        # Select a random background from the list
        background_image = Image.open(random.choice(backgrounds))
        
        num_objects = random.randint(1, 15)
        selected_objects = []
        object_counts = {key: 0 for key in objects_dict.keys()}

        for _ in range(num_objects):
            category = random.choice(list(objects_dict.keys()))
            file = random.choice(objects_dict[category])
            path = os.path.join(objects_folder, category, file)
            image = Image.open(path)
            selected_objects.append((image, category))

        # Place the objects on the background
        result, yolo_labels = place_objects_on_background(background_image.convert("RGBA"), selected_objects, object_counts, category_to_id)
        
        # Save the generated image
        image_filename = f'generated_image_{i+1}.png'
        result.save(os.path.join(output_folder, image_filename))

        # Save the corresponding label file
        label_filename = image_filename.replace(".png", ".txt")
        label_path = os.path.join(output_labels, label_filename)

        with open(label_path, "w") as f:
            for label in yolo_labels:
                f.write(" ".join(map(str, label)) + "\n")

        
def create_yaml_for_generated(output_folders, category_to_id, output_dir):
    """
    Create a dataset.yaml file for the generated dataset.

    Args:
        output_folders (dict): Dictionary with keys 'train', 'val', 'test', and their corresponding folder paths.
        category_to_id (dict): Mapping from category name to unique integer ID.
        output_dir (str): Directory where the dataset.yaml file should be saved.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Build the YAML data
    yaml_data = {
        "train": os.path.relpath(output_folders["train"], output_dir),
        "val": os.path.relpath(output_folders["val"], output_dir),
        "nc": len(category_to_id),
        "names": {v: k for k, v in sorted(category_to_id.items(), key=lambda x: x[1])}
    }

    # Write YAML to file
    yaml_path = os.path.join(output_dir, "dataset.yaml")
    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(yaml_data, yaml_file, sort_keys=False)

    print(f"dataset.yaml created at: {yaml_path}")


In [None]:
# Main folder containing the objects
objects_folders = 'all_objects_no_background'
background_folder = 'generated_backgrounds'

objects_dicts, backgrounds_dict, category_to_id= load_images(objects_folders, background_folder)
output_folders = {"train": "generated/images/train", "val": "generated/images/val", "test": "generated/images/test"}
output_folder_lable = {"train": "generated/labels/train", "val": "generated/labels/val", "test":"generated/labels/test"}

generate_images_and_labels(objects_dicts["train"], backgrounds_dict["train"], objects_folders, output_folders["train"],output_folder_lable["train"],  category_to_id,  count=1000)
generate_images_and_labels(objects_dicts["val"], backgrounds_dict["val"], objects_folders, output_folders["val"], output_folder_lable["val"], category_to_id, count=1000)
generate_images_and_labels(objects_dicts["test"], backgrounds_dict["test"], objects_folders, output_folders["test"],output_folder_lable["test"], category_to_id, count=1000)
create_yaml_for_generated(output_folders, category_to_id, "./generated")
