In [280]:
# !pip install tqdm

In [281]:
import os
import random
from PIL import Image, ImageDraw, ImageFont
import numpy as np

In [282]:
# Define font styles by loading from ./data/fonts directory
font_dir = './data/fonts'
font_paths = [os.path.join(font_dir, f) for f in os.listdir(font_dir) if f.endswith(('.ttf', '.otf'))]

 # Load background images
background_dir = './data/bg'  # Directory where background images are stored
background_files = [f for f in os.listdir(background_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Parameters for image generation
image_size = (50, 50)
font_sizes = [int(image_size[0] * scale) for scale in np.linspace(0.7, 1.0, num=2)]  # Font sizes
rotations = range(-15, 15)  # Rotations from 0 to 15 degrees


color_diff=50

print("image size: ", image_size)
print("font sizes: ", font_sizes)
print("rotations: ", rotations)

image size:  (50, 50)
font sizes:  [35, 50]
rotations:  range(-15, 15)


In [283]:
def get_dominant_color(image):
    # Resize image to reduce computation
    small_image = image.resize((10, 10))
    pixels = small_image.getdata()
    pixel_counts = {}

    for pixel in pixels:
        if pixel not in pixel_counts:
            pixel_counts[pixel] = 1
        else:
            pixel_counts[pixel] += 1

    dominant_color = max(pixel_counts, key=pixel_counts.get)
    print("Dominant color: ", dominant_color)
    r, g, b = dominant_color
    dc = [r, g, b]
    for i in range(3):
        if dc[i] < 128:
            dc[i] += color_diff
        else:
            dc[i] -= color_diff
    return tuple(dc)

In [284]:
def prepare_background_image(background_image):

    # Get the size of the background image
    width, height = background_image.size
    # print("bg width: ", width)
    # print("bg height: ", height)
    # Randomly flip the background image
    if random.choice([True, False]):
        background_image = background_image.transpose(Image.FLIP_LEFT_RIGHT)
    if random.choice([True, False]):
        background_image = background_image.transpose(Image.FLIP_TOP_BOTTOM)
        
    
    output_w, output_h = image_size


    # Ensure the background is large enough for cropping
    if width < output_w*2 or height <  output_h*2:
        background_image = background_image.resize((output_w*2, output_h*2), resample=Image.LANCZOS)

    # Choose a random crop position
    max_x = width - output_w*2
    max_y = height - output_h*2
    # print("max_x: ", max_x)
    # print("max_y: ", max_y)
    x = random.randint(0, max_x)
    y = random.randint(0, max_y)

    # Crop a 200x200 section from the background
    cropped_bg = background_image.crop((x, y, x + output_w*2, y + output_h*2))


    # print("rotations: ", rotations)
    rot  = random.choice(rotations)
    # print("rot: ", rot)
    # Rotate the cropped background
    rotated_bg = cropped_bg.rotate(rot, resample=Image.BICUBIC, expand=False)

    

    # Center-crop the rotated image to 100x100

    x,y,w,h =  (rotated_bg.width - output_w) // 2, (rotated_bg.height - output_h) // 2, output_w, output_h
    bg_final = rotated_bg.crop((x, y, x + w, y + h))

    return bg_final

In [285]:
def create_digit_image(digit, background_image, font_path, font_size, rotation, font_color=None):
    # Prepare the rotated and cropped background
    background_image = prepare_background_image(background_image)

    if font_color:
        text_color = font_color
    else:
        # Get a color variant from the background image
        text_color = get_dominant_color(background_image)

    # # Create a drawing context
    # draw = ImageDraw.Draw(background_image)

    # Load the font
    font = ImageFont.truetype(font_path, font_size)

    # Create a temporary image to rotate the text
    temp_image = Image.new('RGBA', image_size, (255, 255, 255, 0))
    temp_draw = ImageDraw.Draw(temp_image)

    # Get the size of the digit text
    text_size = temp_draw.textbbox((0, 0), str(digit), font=font)[2:]  # Get width and height
    text_position = ((image_size[0] - text_size[0]) // 2, (image_size[1] - text_size[1]) // 2 - 10)

    # Draw the digit on the temporary image
    temp_draw.text(text_position, str(digit), font=font, fill=text_color)

    # Rotate the temporary image with the text
    temp_image = temp_image.rotate(rotation, resample=Image.BICUBIC, expand=True)

    # Paste the rotated text onto the background image
    background_image.paste(temp_image, (0, 0), temp_image)

    return background_image

In [286]:
from tqdm import tqdm

def generate_images(col_name:str,num_samples_per_digit=5,font_color=None):
    # Create directories to save the generated images
    
   
    
    for digit in  tqdm(range(10), desc="Processing digits", unit="Digit", total=10):
        output_dir = './output/'+col_name + '/' + str(digit)
        os.makedirs(output_dir, exist_ok=True)
        for i in range(num_samples_per_digit):
            # Randomly select properties for the image
            background_path = os.path.join(background_dir, random.choice(background_files))
            background_image = Image.open(background_path).convert('RGB')

            font_path = random.choice(font_paths)
            font_size = random.choice(font_sizes)
            rotation = random.choice(rotations)

            # Generate the image
            image = create_digit_image(digit,
                                       background_image, 
                                       font_path, 
                                       font_size, 
                                       rotation,font_color=font_color)

            # Save the image
            image_filename = f'{output_dir}/digit_{digit}_{i}.png'
            image.save(image_filename)
            # print(f'Generated {image_filename}')




In [287]:
# Create digits images with font color white
generate_images('white_text_s50',
                num_samples_per_digit=2000,
                font_color=(255,255,255)
                )

# Create digits images with font color white
# generate_images('white_text',
#                 num_samples_per_digit=1,
#                 font_color=None
#                 )

Processing digits: 100%|██████████| 10/10 [04:29<00:00, 26.98s/Digit]


In [288]:

# # AI prompts for DALL·E to generate backgrounds
# ai_background_prompts = [
#     "A gradient background with soft pastel colors and abstract shapes",
#     "A textured paper background with subtle patterns",
#     "A digital art background with a futuristic grid and neon lights",
#     "A landscape background with soft rolling hills in the distance",
#     "A retro 80s-style background with geometric patterns and vibrant colors",
#     "A blurred forest background with sunlight peeking through the trees",
#     "A simple wooden texture background",
#     "A solid color background with a watercolor texture effect",
#     "A modern abstract art background with dynamic swirls and color splashes",
#     "A city skyline background at dusk with silhouettes of buildings"