Generador de dataset de numeros sintetico para entrenamiento de OCR

In [None]:
import os, random
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont

random.seed(42)
np.random.seed(42)

# === Configure fonts ===

FONTS = [
    r"C:\Windows\Fonts\Arial\ariblk.ttf",
    r"C:\Windows\Fonts\Arial\arialbd.ttf",
    # Add more .ttf similar to real race numbers
]

OUT_DIR = r"..\..\Data\dataset\text_arial"
os.makedirs(OUT_DIR, exist_ok=True)
for d in range(10):
    os.makedirs(os.path.join(OUT_DIR, str(d)), exist_ok=True)

N_PER_DIGIT = 1000          # samples per digit
BASE_SIZE = 64              # internal render size (bigger = better transforms)
OUT_SIZE = 28               # final tile size for OCR (None to keep BASE_SIZE)

# Augmentation knobs
ROT_DEG = 8                 # max rotation degrees (±)
SCALE_RANGE = (0.90, 1.05)  # random scale
TRANS_PX = 1                # random translation in pixels
PERSPECTIVE_JITTER = 0.08   # 0..0.15 is typical
STROKE_JITTER = False        # vary stroke thickness via dilate/erode
BLUR_PROB = 0.35            # probability to apply small blur (then re-binarize)
ERODE_DILATE_KS = [1, 2]    # kernel sizes used when jittering stroke


# ====== UTILS ======
def ensure_binary(img):
    """Ensure output is strictly 0/255 uint8."""
    if img.dtype != np.uint8:
        img = img.astype(np.uint8)
    _, bin_img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
    return bin_img

def render_filled_digit(digit: str, size=96, min_scale=0.5, max_scale=0.8):
    """Render a filled white digit on black with PIL, then binarize to kill anti-alias."""
    font_path = random.choice(FONTS)
    font_size = int(size * random.uniform(min_scale, max_scale))
    font = ImageFont.truetype(font_path, font_size)

    canvas = Image.new("L", (size, size), 0)  # black background
    draw = ImageDraw.Draw(canvas)

    l, t, r, b = draw.textbbox((0, 0), digit, font=font)
    tw, th = r - l, b - t
    x = (size - tw)//2 + np.random.randint(-2, 3)
    y = (size - th)//2 + np.random.randint(-2, 3)

    draw.text((x, y), digit, fill=255, font=font)

    img = np.array(canvas)  # 0..255
    return ensure_binary(img)

def jitter_stroke(img_bin):
    """Randomly thicken or thin stroke using morphology, then keep binary."""
    if not STROKE_JITTER or random.random() < 0.5:
        return img_bin
    k = random.choice(ERODE_DILATE_KS)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k, k))
    if random.random() < 0.5:
        out = cv2.dilate(img_bin, kernel, iterations=1)
    else:
        out = cv2.erode(img_bin, kernel, iterations=1)
    return ensure_binary(out)

def random_affine_binary(img_bin, max_deg=ROT_DEG, scale_range=SCALE_RANGE, translate_px=TRANS_PX):
    """Rotate/scale/translate with nearest interpolation; keep binary."""
    h, w = img_bin.shape[:2]
    ang = np.random.uniform(-max_deg, max_deg)
    scale = np.random.uniform(*scale_range)
    M = cv2.getRotationMatrix2D((w/2, h/2), ang, scale)
    M[0, 2] += np.random.randint(-translate_px, translate_px+1)
    M[1, 2] += np.random.randint(-translate_px, translate_px+1)
    out = cv2.warpAffine(img_bin, M, (w, h), flags=cv2.INTER_NEAREST,
                         borderMode=cv2.BORDER_CONSTANT, borderValue=0)
    return ensure_binary(out)

def random_perspective_binary(img_bin, max_jitter=PERSPECTIVE_JITTER):
    """Apply perspective warp and keep image strictly binary."""
    if max_jitter <= 0:
        return img_bin
    h, w = img_bin.shape[:2]
    src = np.float32([[0,0],[w,0],[w,h],[0,h]])
    j = max_jitter
    jitter = lambda: np.random.uniform(-j, j)
    dst = np.float32([
        [w*(0.0+jitter()), h*(0.0+jitter())],
        [w*(1.0+jitter()), h*(0.0+jitter())],
        [w*(1.0+jitter()), h*(1.0+jitter())],
        [w*(0.0+jitter()), h*(1.0+jitter())],
    ])
    M = cv2.getPerspectiveTransform(src, dst)
    warped = cv2.warpPerspective(img_bin, M, (w, h), flags=cv2.INTER_NEAREST,
                                 borderMode=cv2.BORDER_CONSTANT, borderValue=0)
    return ensure_binary(warped)

def maybe_blur_and_rebinarize(img_bin):
    """Optionally apply a tiny blur to simulate mild defocus, then re-binarize."""
    if random.random() > BLUR_PROB:
        return img_bin
    k = random.choice([3, 5])  # small blur only
    blurred = cv2.GaussianBlur(img_bin, (k, k), sigmaX=0.8)
    # Use Otsu to re-binarize in case edges get grey
    _, out = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return ensure_binary(out)

def get_bbox(img_bin):
    """Return tight bounding box (y1,y2,x1,x2) of foreground; None if empty."""
    ys, xs = np.where(img_bin > 0)
    if len(xs) == 0 or len(ys) == 0:
        return None
    y1, y2 = ys.min(), ys.max()
    x1, x2 = xs.min(), xs.max()
    return y1, y2, x1, x2

def recenter_and_fit(img_bin, out_size=(96,96), pad=4, keep_aspect=True):
    """
    Crop to bbox, center by centroid, and fit into out_size with fixed padding.
    Ensures strictly binary output and prevents edge clipping.
    """
    h_out, w_out = out_size
    bb = get_bbox(img_bin)
    if bb is None:
        # Return empty canvas
        return np.zeros(out_size, np.uint8)

    y1, y2, x1, x2 = bb
    crop = img_bin[y1:y2+1, x1:x2+1]

    # Compute centroid to help recentring (optional but helpful)
    m = cv2.moments(crop, binaryImage=True)
    if m["m00"] > 0:
        cx = m["m10"] / m["m00"]
        cy = m["m01"] / m["m00"]
    else:
        cx = crop.shape[1] / 2.0
        cy = crop.shape[0] / 2.0

    # Target drawable area after padding
    Ht, Wt = h_out - 2*pad, w_out - 2*pad
    Ht = max(1, Ht); Wt = max(1, Wt)

    if keep_aspect:
        scale = min(Ht / crop.shape[0], Wt / crop.shape[1])
        new_h = max(1, int(round(crop.shape[0] * scale)))
        new_w = max(1, int(round(crop.shape[1] * scale)))
    else:
        new_h, new_w = Ht, Wt

    # Resize with nearest to keep binary
    resized = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
    resized = (resized > 0).astype(np.uint8) * 255

    canvas = np.zeros((h_out, w_out), np.uint8)
    y_off = pad + (Ht - new_h) // 2
    x_off = pad + (Wt - new_w) // 2
    canvas[y_off:y_off+new_h, x_off:x_off+new_w] = resized

    # Final guard: if foreground touches edges, shrink slightly and refit once
    touching = (
        canvas[0,:].any() or canvas[-1,:].any() or
        canvas[:,0].any() or canvas[:,-1].any()
    )
    if touching and min(new_h, new_w) > 2:
        # Shrink by ~6% and refit
        sh, sw = int(new_h*0.94), int(new_w*0.94)
        resized2 = cv2.resize(resized, (sw, sh), interpolation=cv2.INTER_NEAREST)
        canvas = np.zeros((h_out, w_out), np.uint8)
        y_off = pad + (Ht - sh) // 2
        x_off = pad + (Wt - sw) // 2
        canvas[y_off:y_off+sh, x_off:x_off+sw] = resized2

    return (canvas > 0).astype(np.uint8) * 255

def synth_filled_digit_tile(digit: str, size=96):
    """Full pipeline: render -> stroke jitter -> affine -> perspective -> (optional) blur -> ensure binary."""
    img = render_filled_digit(digit, size=size)
    img = jitter_stroke(img)
    img = random_affine_binary(img)
    img = random_perspective_binary(img)
    img = maybe_blur_and_rebinarize(img)
    img = recenter_and_fit(img, out_size=(size, size), pad=4, keep_aspect=True)
    return ensure_binary(img)

def generate_per_digit_dataset(n_per_digit=N_PER_DIGIT, base_size=BASE_SIZE, out_size=OUT_SIZE):
    print("[INFO] Generating binary filled digits...")
    os.makedirs(OUT_DIR, exist_ok=True)
    for d in range(10):
        outdir = os.path.join(OUT_DIR, str(d))
        os.makedirs(outdir, exist_ok=True)

    for d in range(10):
        for i in range(n_per_digit):
            tile = synth_filled_digit_tile(str(d), size=base_size)
            if out_size is not None:
                tile = cv2.resize(tile, (out_size, out_size), interpolation=cv2.INTER_NEAREST)
                tile = ensure_binary(tile)
            cv2.imwrite(f"{OUT_DIR}/{d}/{d}_{i:05d}.png", tile)
        print(f"[INFO] Digit {d} done.")
    print("[INFO] All digits generated.")


# ====== OPTIONAL: multi-digit plate (still in binary, filled) ======
def compose_plate_filled(number_str: str, digit_size=48, spacing=6, pad=8):
    """Compose a multi-digit plate (binary, filled)."""
    digits = [synth_filled_digit_tile(ch, size=digit_size) for ch in number_str]
    h = digit_size
    w = len(digits) * digit_size + (len(digits)-1) * spacing
    plate = np.zeros((h, w), np.uint8)

    x = 0
    for dg in digits:
        dg = ensure_binary(dg)
        plate[:, x:x+digit_size] = np.maximum(plate[:, x:x+digit_size], dg)
        x += digit_size + spacing

    if pad > 0:
        plate = cv2.copyMakeBorder(plate, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=0)

    return ensure_binary(plate)


# ====== MAIN ======
if __name__ == "__main__":
    #generate_per_digit_dataset()
    # Example: make a sample plate "247"
    sample = compose_plate_filled("896", digit_size=64, spacing=8, pad=10)
    cv2.imwrite("./sample_plate_896.png", sample)
    print("[INFO] Sample plate saved: sample_plate_247.png")

[INFO] Sample plate saved: sample_plate_247.png
