In [5]:
from __future__ import annotations

from pathlib import Path
from typing import Dict, Iterable, List, Tuple

from PIL import Image, ImageDraw, ImageFont

In [6]:
def _fit_font_size(
    text: str,
    font_path: str,
    canvas_size: Tuple[int, int],
    margin: int,
    max_size: int,
    min_size: int,
) -> ImageFont.FreeTypeFont:
    W, H = canvas_size
    target_w = max(1, W - 2 * margin)
    target_h = max(1, H - 2 * margin)

    dummy_img = Image.new("L", (W, H), 0)
    dummy_draw = ImageDraw.Draw(dummy_img)

    best_font = ImageFont.truetype(font_path, size=min_size)
    lo, hi = min_size, max_size

    while lo <= hi:
        mid = (lo + hi) // 2
        font = ImageFont.truetype(font_path, size=mid)

        bbox = dummy_draw.textbbox((0, 0), text, font=font)
        text_w = bbox[2] - bbox[0]
        text_h = bbox[3] - bbox[1]

        if text_w <= target_w and text_h <= target_h:
            best_font = font
            lo = mid + 1
        else:
            hi = mid - 1

    return best_font


def _safe_filename_from_text(text: str, naming: str = "codepoint") -> str:
    if naming == "text":
        bad = r'<>:"/\|?*'
        cleaned = "".join("_" if c in bad else c for c in text).strip()
        if not cleaned:
            cleaned = "empty"
        return cleaned + ".png"

    cps = "-".join([f"U+{ord(ch):04X}" for ch in text])
    return f"{cps}.png"


In [7]:
def _render_single(
    text: str,
    font_path: str,
    image_size: Tuple[int, int],
    margin: int,
    max_font_size: int,
    min_font_size: int,
) -> Image.Image:
    W, H = image_size
    img = Image.new("L", (W, H), 0)  
    draw = ImageDraw.Draw(img)

    font = _fit_font_size(
        text=text,
        font_path=font_path,
        canvas_size=(W, H),
        margin=margin,
        max_size=max_font_size,
        min_size=min_font_size,
    )

    bbox = draw.textbbox((0, 0), text, font=font)
    text_w = bbox[2] - bbox[0]
    text_h = bbox[3] - bbox[1]

    x = (W - text_w) // 2 - bbox[0]
    y = (H - text_h) // 2 - bbox[1]

    draw.text((x, y), text, fill=255, font=font)  
    return img


In [8]:
def render_text_images_by_category(
    categorized_texts: Dict[str, Iterable[str]],
    out_dir: str | Path,
    font_path: str | Path,
    image_size: Tuple[int, int] = (256, 256),
    margin: int = 18,
    max_font_size: int = 240,
    min_font_size: int = 10,
    naming: str = "codepoint",
    overwrite: bool = True,
    dedupe_within_category: bool = True,
) -> Dict[str, List[Path]]:
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    font_path = str(Path(font_path))
    results: Dict[str, List[Path]] = {}

    for category, texts in categorized_texts.items():
        cat_dir = out_dir / category
        cat_dir.mkdir(parents=True, exist_ok=True)

        if texts is None:
            results[category] = []
            continue

        if dedupe_within_category:
            seen = set()
            uniq_texts = []
            for t in texts:
                if t is None:
                    continue
                s = str(t)
                if not s:
                    continue
                if s in seen:
                    continue
                seen.add(s)
                uniq_texts.append(s)
            texts_iter = uniq_texts
        else:
            texts_iter = [str(t) for t in texts if t is not None and str(t)]

        saved_paths: List[Path] = []
        for t in texts_iter:
            filename = _safe_filename_from_text(t, naming=naming)
            out_path = cat_dir / filename

            if out_path.exists() and not overwrite:
                continue

            img = _render_single(
                text=t,
                font_path=font_path,
                image_size=image_size,
                margin=margin,
                max_font_size=max_font_size,
                min_font_size=min_font_size,
            )
            img.save(out_path, format="PNG")
            saved_paths.append(out_path)

        results[category] = saved_paths

    return results


In [24]:
# 분류별 텍스트를 한 번에 입력
categorized = {
    "horiz_nojong_chosung": ["가", "까", "나", "다", "따", "라", "마", "바", "빠", "사", "싸", "아", "자", "짜", "차", "카", "타", "파", "하"],
    "horiz_nojong_jungsung": ["아", "야", "애", "어", "여", "에", "얘", "예", "이"],
    "vert_nojong_chosung": ["고", "꼬", "노", "도", "또", "로", "모", "보", "뽀", "소", "쏘", "오", "조", "쪼", "초", "코", "토", "포", "호"],
    "vert_nojong_jungsung": ["오", "요", "우", "유", "으"],
    "mix_nojong_chosung": ["과", "꽈", "놔", "돠", "똬", "롸", "뫄", "봐", "뽜", "솨", "쏴", "와", "좌", "쫘", "촤", "콰", "톼", "퐈", "화"],
    "mix_nojong_jungsung": ["와", "외", "위", "왜", "워", "웨", "의"],

    "horiz_jong_chosung": ["강", "깡", "낭", "당", "땅", "랑", "망", "방", "빵", "상", "쌍", "앙", "장", "짱", "창", "캉", "탕", "팡", "항"],
    "horiz_jong_jungsung": ["악", "양", "앵", "엉", "영", "엥", "얭", "옝", "잉"],
    "horiz_jong_jongsung": ["악", "앆", "앇", "안", "앉", "않", "앋", "알", "앍", "앎", "앏", "앐", "앑", "앒", "앓", "암", "압", "앖", "앗", "았", "앙", "앚", "앛", "앜", "앝", "앞", "앟"],
    "vert_jong_chosung": ["공", "꽁", "농", "동", "똥", "롱", "몽", "봉", "뽕", "송", "쏭", "옹", "종", "쫑", "총", "콩", "통", "퐁", "홍"],
    "vert_jong_jungsung": ["옹", "용", "웅", "융", "응"],
    "vert_jong_jongsung": ["옥", "옦", "옧", "온", "옩", "옪", "옫", "올", "옭", "옮", "옯", "옰", "옱", "옲", "옳", "옴", "옵", "옶", "옷", "옸", "옹", "옺", "옻", "옼", "옽", "옾", "옿"],
    "mix_jong_chosung": ["광", "꽝", "놩", "돵", "뙁", "뢍", "뫙", "봥", "뽱", "솽", "쐉", "왕", "좡", "쫭", "촹", "쾅", "퇑", "퐝", "황"],
    "mix_jong_jungsung": ["왕", "욍", "윙", "왱", "웡", "웽", "읭"],
    "mix_jong_jongsung": ["왁", "왂", "왃", "완", "왅", "왆", "왇", "왈", "왉", "왊", "왋", "왌", "왍", "왎", "왏", "왐", "왑", "왒", "왓", "왔", "왕", "왖", "왗", "왘", "왙", "왚", "왛"],

}

font_path = r"D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\나눔손글씨 흰꼬리수리.ttf"
out_dir = r"D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리"

results = render_text_images_by_category(
    categorized_texts=categorized,
    out_dir=out_dir,
    font_path=font_path,
    image_size=(256, 256),
    margin=18,
    naming="codepoint",
    overwrite=True,
)

# 요약 출력
for k, v in results.items():
    print(f"{k}: {len(v)} images -> {Path(out_dir)/k}")


horiz_nojong_chosung: 19 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\horiz_nojong_chosung
horiz_nojong_jungsung: 9 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\horiz_nojong_jungsung
vert_nojong_chosung: 19 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\vert_nojong_chosung
vert_nojong_jungsung: 5 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\vert_nojong_jungsung
mix_nojong_chosung: 19 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\mix_nojong_chosung
mix_nojong_jungsung: 7 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\mix_nojong_jungsung
horiz_jong_chosung: 19 images -> D:\Study\학교강의\4학년2학기\캡스톤\Baram_Handwritting_Analysis\hangul_dataset\korean_sample_흰꼬리수리\horiz_jong_chosung
horiz