In [1]:
import os
import json
from concurrent.futures import ProcessPoolExecutor
from text_image_generator import create_text_image

os.makedirs(
    "./tmp", exist_ok=True
)  # Create a temporary folder to store the generated images


def get_all_fonts(parent_folder="fonts", exclude_bold=True, exclude_italic=True):
    import os

    font_files = []
    for root, dirs, files in os.walk(parent_folder):
        for file in files:
            if file.endswith(".ttf") or file.endswith(".otf"):
                font_files.append(os.path.join(root, file))

    if exclude_bold:
        font_files = [f for f in font_files if "bold" not in f.lower()]

    if exclude_italic:
        font_files = [f for f in font_files if "italic" not in f.lower()]

    print(f"Found {len(font_files)} fonts")

    return font_files


def generate_dataset(words, fonts, output_folder="Acknowledgement", scale_factor=1.0):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    configurations = []

    tasks = []

    with ProcessPoolExecutor() as executor:
        for word in words:
            for font_path in fonts:
                for circle_index in range(len(word)):
                    for thickness in range(2, 5, 1):
                        for x_offset in range(0, 1, 1):
                            for y_offset in range(0, 1, 1):
                                for padding in [
                                    25,
                                    50,
                                    100,
                                    200,
                                ]:  # Added range for padding
                                    config = {
                                        "word": word,
                                        "font_path": font_path,  # Corrected key name from "font_ path" to "font_path"
                                        "circle_index": circle_index,
                                        "thickness": thickness,
                                        "scale_factor": scale_factor,
                                        "padding": padding,  # Use variable padding
                                        "x_offset": x_offset,
                                        "y_offset": y_offset,
                                        "canvas_width": 10,
                                        "canvas_height": 2,
                                        "final_width": 512,
                                        "final_height": 512,
                                    }
                                    tasks.append(
                                        (
                                            config,
                                            executor.submit(
                                                create_text_image,
                                                text=word,
                                                font_path=font_path,
                                                circle_index=circle_index,
                                                thickness=thickness,
                                                scale_factor=scale_factor,
                                                padding=padding,  # Use variable padding
                                                x_offset=x_offset,
                                                y_offset=y_offset,
                                                canvas_width=10,
                                                canvas_height=2,
                                                final_width=512,
                                                final_height=512,
                                                output_folder=output_folder,
                                            ),
                                        )
                                    )

        for config, task in tasks:
            image_path = task.result()
            if image_path:
                config["image_path"] = image_path
                configurations.append(config)

    # Save the configurations to a JSON file
    with open(os.path.join(output_folder, "configurations.json"), "w") as f:
        json.dump(configurations, f, indent=4)


if __name__ == "__main__":
    # Example usage
    words = [
        "Acknowledgement",
    ]
    fonts = get_all_fonts(parent_folder="fonts")
    generate_dataset(words, fonts, scale_factor=1.4)  # Adjust scale_factor as needed

Found 2 fonts


2024-05-30 22:46:07,762 - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.
2024-05-30 22:46:07,762 - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.
2024-05-30 22:46:07,763 - DEBUG - findfont: Matching sans\-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0.
2024-05-30 22:46:07,763 - DEBUG - findfont: Matching sans\-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0.
2024-05-30 22:46:07,763 - DEBUG - findfont: score(FontEntry(fname='/Volumes/SecondHeart/miniconda3/envs/defaulttorch2/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-BoldItalic.ttf', name='DejaVu Serif', style='italic', variant='normal', weight=700, stretch='normal', size='scalable')) = 11.335
2024-05-30 22:46:07,763 - DEBUG - findfont: score(FontEntry(fname='/Volumes/SecondHeart/miniconda3/envs/defaulttorch2/lib/python3.10/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf', 