In [95]:
import os
import sys
from pathlib import Path
import cv2
import numpy as np


def setup_project_path():
    current = Path.cwd()
    while not (current / 'craft').exists():
        current = current.parent
    return current
project_root = setup_project_path()
sys.path.insert(0, str(project_root))

In [96]:
def imwrite_unicode(path, img):
    ext = os.path.splitext(path)[1]
    ok, buf = cv2.imencode(ext, img)
    if not ok:
        return False
    with open(path, "wb") as f:
        f.write(buf.tobytes())
    return True

In [97]:
def imread_unicode(path, flags=cv2.IMREAD_COLOR):
    try:
        with open(path, "rb") as f:
            data = f.read()
        img_array = np.frombuffer(data, np.uint8)
        img = cv2.imdecode(img_array, flags)
        return img
    except Exception as e:
        print("[imread_unicode ERROR]", e)
        return None


In [153]:
def load_centers(centers_file: str):
    centers = []
    with open(centers_file, "r", encoding="utf-8") as f:
        for line in f:
            s = line.strip()
            if not s:
                continue
            x_str, y_str = s.split(",")
            centers.append((float(x_str), float(y_str)))
    return centers

In [149]:
def load_score(score_npy: str):
    score = np.load(score_npy).astype(np.float32)
    if score.ndim != 2:
        raise ValueError(f"score_text must be 2D, got shape={score.shape}")
    return score


In [143]:
def min_score_on_segment(score, p1, p2, samples=120):
    H, W = score.shape
    xs = np.linspace(p1[0], p2[0], samples)
    ys = np.linspace(p1[1], p2[1], samples)

    xi = np.clip(xs.round().astype(int), 0, W - 1)
    yi = np.clip(ys.round().astype(int), 0, H - 1)

    vals = score[yi, xi]
    idx = int(np.argmin(vals))

    return int(xi[idx]), int(yi[idx]), float(vals[idx])


In [141]:
def compute_cut_positions(centers, score,
                          ignore_ratio=0.2,
                          samples=140):
    centers = sorted(centers, key=lambda p: p[0])
    cuts = []

    for i in range(len(centers) - 1):
        (x1, y1) = centers[i]
        (x2, y2) = centers[i + 1]

        # 끝부분 무시 (글자 내부 valley 방지)
        t0 = ignore_ratio
        t1 = 1.0 - ignore_ratio

        p_start = (
            x1 + (x2 - x1) * t0,
            y1 + (y2 - y1) * t0,
        )
        p_end = (
            x1 + (x2 - x1) * t1,
            y1 + (y2 - y1) * t1,
        )

        x_cut, _, _ = min_score_on_segment(
            score, p_start, p_end, samples
        )
        cuts.append(x_cut)

    return sorted(cuts)

In [139]:
def split_image_by_cuts(img, cuts, margin=2, min_width=8):
    H, W = img.shape[:2]
    bounds = [0] + cuts + [W]

    crops = []
    for i in range(len(bounds) - 1):
        x1 = max(0, bounds[i] - margin)
        x2 = min(W, bounds[i + 1] + margin)

        if x2 - x1 < min_width:
            continue

        crops.append((x1, x2, img[:, x1:x2].copy()))

    return crops

In [137]:
def split_chars_from_centers(
    image_folder,
    centers_folder,
    score_folder,
    out_folder,
):
    os.makedirs(out_folder, exist_ok=True)

    for fname in os.listdir(centers_folder):
        if not fname.endswith("_center.txt"):
            continue

        name = fname.replace("_center.txt", "").replace("res_", "")

        centers_file = os.path.join(centers_folder, fname)
        score_file = os.path.join(score_folder, f"res_{name}_score_text.npy")

        img_path = None
        for ext in [".png", ".jpg", ".jpeg", ".bmp", ".webp"]:
            p = os.path.join(image_folder, name + ext)
            if os.path.exists(p):
                img_path = p
                break

        if img_path is None or not os.path.exists(score_file):
            print(f"[WARN] skip {name}")
            continue

        img = imread_unicode(img_path)
        score = load_score(score_file)

        if score.shape[:2] != img.shape[:2]:
            score = cv2.resize(score, (img.shape[1], img.shape[0]))

        centers = load_centers(centers_file)

        cuts = compute_cut_positions(centers, score)

        crops = split_image_by_cuts(img, cuts)

        char_dir = os.path.join(out_folder, name)
        os.makedirs(char_dir, exist_ok=True)

        for i, (_, _, crop) in enumerate(crops):
            out = os.path.join(char_dir, f"char_{i:02d}.png")
            imwrite_unicode(out, crop)

        print(f"[OK] {name}: {len(crops)} chars")

In [155]:
split_chars_from_centers(
    image_folder=str(project_root/"craft"/"images"),
    centers_folder=str(project_root/"craft"/"results"/"center"),          
    score_folder=str(project_root/"craft"/"results"/"score_text"),       
    out_folder=str(project_root/"craft"/"results"/"chars_split"),       
)

[OK] test1: 4 chars
[OK] test2: 6 chars
[OK] test3: 5 chars
[OK] test4: 4 chars
[OK] test5: 7 chars
