I want you to write a python program for me. The purpose of the program is to create some image files containing random single digits. These digit files will be used to train a CRNN. 


Inputs:
I will provide a font ttf file called "font_file" and want to specify the number of digits files to create "num_files_to_create". I will also specify the output folder "output_folder".

Output:
PNG Image file containing single digit of the specified font, random size. The filename should be random digits and alpha characters - ending with _ and the digit within.

Balance the dataset: Ensure that the dataset contains an equal number of images for each digit, avoiding class imbalance issues during training.

There should be a record of the files created within a pandas dataframe - this should be returned and persisted to csv. This program will be run multiple times using different font files - make sure the returning dataframe is appended and not overwriting the csv.

All code outside of the input parameters should be held within functions, must be well commented and contain docstring.



In [3]:
import os
import random
import string
from PIL import Image, ImageDraw, ImageFont
import pandas as pd

def create_random_digit_image(font_file, output_folder, digit, filename):
    """
    Creates a random single digit image using the specified font file.
    
    Args:
        font_file (str): Font file path (ttf).
        output_folder (str): Output folder path.
        digit (int): The digit to create an image for.
        filename (str): The filename for the created image.
    """
    font_size = random.randint(20, 100)
    font = ImageFont.truetype(font_file, font_size)
    width, height = font.getsize(str(digit))
    img = Image.new("RGBA", (width, height), (255, 255, 255, 255))
    draw = ImageDraw.Draw(img)
    draw.text((0, 0), str(digit), font=font, fill=(0, 0, 0, 255))
    img.save(os.path.join(output_folder, filename + ".png"), "PNG")

def create_random_digit_images(font_file, num_files_to_create, output_folder):
    """
    Creates random single digit images using the specified font file.
    
    Args:
        font_file (str): Font file path (ttf).
        num_files_to_create (int): The number of image files to create.
        output_folder (str): Output folder path.
    
    Returns:
        pd.DataFrame: A dataframe containing the filenames and digits created.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    records = []
    num_files_per_digit = num_files_to_create // 10

    for digit in range(10):
        for _ in range(num_files_per_digit):
            filename = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + '_' + str(digit)
            create_random_digit_image(font_file, output_folder, digit, filename)
            records.append((filename, digit))

    return pd.DataFrame(records, columns=["filename", "digit"])

def append_dataframe_to_csv(df, csv_file):
    """
    Appends a dataframe to a CSV file.
    
    Args:
        df (pd.DataFrame): The dataframe to append.
        csv_file (str): The CSV file to append the dataframe to.
    """
    if os.path.exists(csv_file):
        df_existing = pd.read_csv(csv_file)
        df_combined = pd.concat([df_existing, df], ignore_index=True)
    else:
        df_combined = df

    df_combined.to_csv(csv_file, index=False)

# Input parameters
# font_file = "./fonts/5by7_b.ttf"
font_file = "./fonts/DOTMATRI.TTF"
# font_file = "./fonts/DOTMBold.TTF"

num_files_to_create = 200000
output_folder = "output_folder"
csv_file = "created_files.csv"

# Create images and record in dataframe
df = create_random_digit_images(font_file, num_files_to_create, output_folder)

# Append dataframe to CSV
append_dataframe_to_csv(df, csv_file)


  width, height = font.getsize(str(digit))
