In [3]:
import random
import text_utils as tu  # Ensure this supports UTF-8
from PIL import ImageFont, Image, ImageDraw
import os
import numpy as np
import cv2

class RendererV3(object):
    def __init__(self, data_dir, max_time=None):
        self.text_renderer = tu.RenderFont(data_dir)
        self.colorizer = Colorize(data_dir)
        self.min_char_height = 8  # px
        self.min_asp_ratio = 0.4
        self.max_text_regions = 7
        self.max_time = max_time
        
        # Load multilingual fonts
        self.fonts = {
            #'en': os.path.join(data_dir, 'fonts/DejaVuSans.ttf'),
            #'fr': os.path.join(data_dir, 'fonts/NotoSans-Regular.ttf'),
            'de': os.path.join(data_dir, 'LiberationSans-Regular.ttf'),
           # 'ru': os.path.join(data_dir, 'fonts/NotoSansCyrillic-Regular.ttf'),
           # 'it': os.path.join(data_dir, 'fonts/DejaVuSans.ttf')
        }
        
        # Load word lists for each language
        self.word_lists = {}
        for lang, file_path in {
            #'en': 'data/english_words.txt',
            #'fr': 'data/french_words.txt',
            'de': 'german_words.txt',
            #'ru': 'data/russian_words.txt',
            #'it': 'data/italian_words.txt'
        }.items():
            self.word_lists[lang] = self.load_word_list(file_path)
            
            if not self.word_lists[lang]:
                print(f"Warning: Word list for {lang} is empty or missing!")
    
    def load_word_list(self, file_path):
        """Load a list of words from a text file."""
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as f:
                return [line.strip() for line in f.readlines()]
        return []
    
    def sample_language(self):
        """Randomly selects a language for text generation."""
        return random.choice(list(self.word_lists.keys()))
    
    def generate_multilingual_text(self):
        """Generate text in a randomly chosen language."""
        lang = self.sample_language()
        words = random.sample(self.word_lists[lang], min(len(self.word_lists[lang]), 5))
        text = ' '.join(words)
        return text, lang
    
    def place_text(self, rgb, collision_mask, H, Hinv):
        """Places multilingual text on an image."""
        text, lang = self.generate_multilingual_text()
        font_path = self.fonts[lang]
        
        try:
            font = ImageFont.truetype(font_path, 32)
        except IOError:
            print(f"Error: Could not load font {font_path}. Falling back to default.")
            font = ImageFont.load_default()
        
        render_res = self.text_renderer.render_sample(font, collision_mask, text)
        
        if render_res is None:
            return None
        else:
            text_mask, loc, bb = render_res
            collision_mask += (255 * (text_mask > 0)).astype('uint8')
            text_mask = self.warpHomography(text_mask, H, rgb.shape[:2][::-1])
            bb = self.homographyBB(bb, Hinv)
            return text, bb, collision_mask

    def generate_dataset(self, output_dir, num_samples=100):
        """Generates a dataset of synthetic text images."""
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        for i in range(num_samples):
            text, lang = self.generate_multilingual_text()
            font_path = self.fonts[lang]
            
            try:
                font = ImageFont.truetype(font_path, 32)
            except IOError:
                print(f"Error: Could not load font {font_path}. Skipping this sample.")
                continue
            
            text_size = font.getsize(text)
            img = Image.new('L', text_size, color=255)
            draw = ImageDraw.Draw(img)
            draw.text((0, 0), text, font=font, fill=0)
            img.save(os.path.join(output_dir, f'synthetic_text_{i}_{lang}.png'))
            
            with open(os.path.join(output_dir, f'synthetic_text_{i}_{lang}.txt'), 'w', encoding='utf-8') as f:
                f.write(text)

        print(f"Generated {num_samples} synthetic text images in {output_dir}")
