In [21]:
import os
import numpy as np
import csv 
import random
import string
from PIL import Image, ImageDraw, ImageFont

In [22]:
output_dir = 'captcha_images/bonus'
os.makedirs(output_dir, exist_ok=True)
image_size = (200,80)
font_size = 40
num_samples = 100

# Extended font list with 11 working fonts for more diversity
font_paths = [
    # Original working fonts
    '../fonts/OpenSans-Regular.ttf',
    '../fonts/DejaVuSans.ttf',
    '../fonts/Ubuntu-Regular.ttf',
    # Additional system fonts
    '/usr/share/fonts/liberation/LiberationSans-Regular.ttf',
    '/usr/share/fonts/liberation/LiberationSerif-Regular.ttf', 
    '/usr/share/fonts/liberation/LiberationMono-Regular.ttf',
    '/usr/share/fonts/TTF/DejaVuSerif-Bold.ttf',
    '/usr/share/fonts/TTF/DejaVuSans-Oblique.ttf',
    '/usr/share/fonts/TTF/DejaVuSansCondensed-Bold.ttf',
    '/usr/share/fonts/noto/NotoSans-Regular.ttf',
    '/usr/share/fonts/noto/NotoSerif-Regular.ttf'
]

In [23]:
with open('../ai_wordlist.txt', 'r') as f:
    words = [line.strip() for line in f.readlines()]

In [24]:
def add_noise(image):
    np_img = np.array(image).astype(np.int16)
    # Reduced noise for colored backgrounds
    noise = np.random.normal(0, 15, np_img.shape) 
    noisy = np.clip(np_img + noise, 0, 255).astype(np.uint8)
    return Image.fromarray(noisy)

In [25]:
def random_caps(word):
    return ''.join(c.upper() if random.random() > 0.5 else c.lower() for c in word)

In [29]:
def get_background_color():
    """Randomly choose between green and red background"""
    return random.choice(['green', 'red'])

def get_background_rgb(color_name):
    """Convert color name to RGB values"""
    if color_name == 'green':
        # Light green background
        return (144, 238, 144)  # Light green
    elif color_name == 'red':
        # Light red background
        return (255, 182, 193)  # Light pink/red
    else:
        return (255, 255, 255)  # White fallback

def render_word_based_on_background(word, background_color):
    """
    Render word normally for green background, reversed for red background
    Returns: (rendered_word, original_word_for_label)
    """
    original_word = word
    if background_color == 'red':
        rendered_word = word[::-1]  # Reverse the word
    else:
        rendered_word = word  # Normal rendering
    
    return rendered_word, original_word

In [30]:
def get_font(font_paths = font_paths):
    return random.choice(font_paths)

In [38]:
def generate_dataset(bg_color: str, writer):
    i = 0
    print(len(words))
    for word in words:
        font_path = get_font()
        word = random_caps(word)
        if bg_color == 'red':
            bg_color_val = (255, 0, 0)
        else:
            bg_color_val = (0, 255, 0)
        image = Image.new('RGB', image_size, bg_color_val)
        draw = ImageDraw.Draw(image)
        font = ImageFont.truetype(font_path, font_size)

        bbox = draw.textbbox((0, 0), word, font=font)
        text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1]
        position = ((image_size[0]-text_w)//2, (image_size[1]-text_h)//2)
        
        draw.text(position, word, fill=50, font=font)

        image = add_noise(image)

        filename = f"img_{bg_color}_{i}.png"
        i+= 1
        image.save(os.path.join(output_dir, filename))
        writer.writerow([filename, word])



In [39]:
output_file_path = os.path.join(output_dir, 'labels.csv')
with open(output_file_path, mode = 'w', newline = '') as file:
        writer = csv.writer(file)
        writer.writerow(['filename', 'label'])
        for bg_color in ['green','red']:
                generate_dataset(bg_color, writer)

100
100
100


In [31]:
output_file_path = os.path.join(output_dir, 'labels.csv')
i = 0
with open(output_file_path, mode = 'w', newline = '') as file:
    writer = csv.writer(file)
    writer.writerow(['filename', 'label'])

    print(len(words))
    for word in words:
        font_path = get_font()
        word = random_caps(word)
        
        # Get background color (green or red)
        bg_color_name = get_background_color()
        bg_color_rgb = get_background_rgb(bg_color_name)
        
        # Determine how to render the word based on background
        rendered_word, label_word = render_word_based_on_background(word, bg_color_name)
        
        # Create image with colored background
        image = Image.new('RGB', image_size, bg_color_rgb)
        draw = ImageDraw.Draw(image)
        font = ImageFont.truetype(font_path, font_size)

        # Calculate position for the rendered word (which might be reversed)
        bbox = draw.textbbox((0, 0), rendered_word, font=font)
        text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1]
        position = ((image_size[0]-text_w)//2, (image_size[1]-text_h)//2)
        
        # Use darker text color to contrast with colored backgrounds
        text_color = tuple(random.randint(0, 100) for _ in range(3))
        draw.text(position, rendered_word, fill=text_color, font=font)

        # Add noise
        image = add_noise(image)

        filename = f"img_{i}.png"
        i += 1
        image.save(os.path.join(output_dir, filename))
        
        # Important: Save the original word as label, not the rendered word
        writer.writerow([filename, label_word])
        
        # Debug info for first few images
        if i <= 5:
            print(f"Image {i}: Background={bg_color_name}, Original='{label_word}', Rendered='{rendered_word}'")


100
Image 1: Background=red, Original='NeUrAL', Rendered='LArUeN'
Image 2: Background=green, Original='tEnSor', Rendered='tEnSor'
Image 3: Background=green, Original='maTRix', Rendered='maTRix'
Image 4: Background=green, Original='veCTOr', Rendered='veCTOr'
Image 5: Background=green, Original='LOgits', Rendered='LOgits'
