In [24]:
# ============================================================
# Synthetic SII-like TED -> PDF417 -> PNG/JPG
# CSV = (image, text) where text is the FULL payload string used for that image
# Columns ~ 20 (+/-): target 20, allow 18..22
# + Randomized "camera/scan" degradations BEFORE saving:
#   - gaussian blur (mild)
#   - color changes (brightness/contrast/saturation + slight hue shift)
#   - salt & pepper (mild)
#   - perspective "trapezoid" / keystone (mild)
#   - small rotation + shear
#   - yellowing (paper tint)
#   - optional downscale->upscale (softening)
#
# Jupyter:
#   %pip install pdf417gen pillow numpy
# ============================================================

from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime, date, timedelta
from pathlib import Path
import base64
import hashlib
import random
import re
import string
import csv
from typing import Tuple, Optional, List

import pdf417gen
from PIL import Image, ImageFilter, ImageEnhance, ImageChops
import numpy as np


# ----------------------------
# 1) Utilities
# ----------------------------
def safe_filename(s: str, max_len: int = 150) -> str:
    s = (s or "").strip()
    s = re.sub(r"[^\w\-.]+", "_", s)
    return (s[:max_len] or "barcode")

def weighted_choice(items, weights):
    return random.choices(items, weights=weights, k=1)[0]

def rand_alnum(n: int) -> str:
    return "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n))

def rand_digits(n: int) -> str:
    return "".join(random.choice(string.digits) for _ in range(n))


# ----------------------------
# 2) Accents + capitalization randomization
# ----------------------------
VOWEL_ACCENTS = {
    "a": ["a", "á"], "e": ["e", "é"], "i": ["i", "í"], "o": ["o", "ó"], "u": ["u", "ú"],
    "A": ["A", "Á"], "E": ["E", "É"], "I": ["I", "Í"], "O": ["O", "Ó"], "U": ["U", "Ú"],
}

def randomize_accents(text: str, prob: float = 0.15) -> str:
    out = []
    for ch in text:
        if ch in VOWEL_ACCENTS and random.random() < prob:
            out.append(random.choice(VOWEL_ACCENTS[ch]))
        else:
            out.append(ch)
    return "".join(out)

def randomize_case(text: str) -> str:
    mode = random.choice(["lower", "upper", "title", "mixed", "original"])
    if mode == "lower":
        return text.lower()
    if mode == "upper":
        return text.upper()
    if mode == "title":
        return text.title()
    if mode == "mixed":
        return "".join(ch.upper() if random.random() < 0.5 else ch.lower() for ch in text)
    return text

def spanish_noise(text: str, accent_prob: float = 0.15, apply_case: bool = True) -> str:
    if not text:
        return text
    t = randomize_accents(text, accent_prob)
    if apply_case:
        t = randomize_case(t)
    return t


# ----------------------------
# 3) RUT generation (valid DV)
# ----------------------------
def rut_dv(rut_number: int) -> str:
    total = 0
    mult = 2
    n = rut_number
    while n > 0:
        total += (n % 10) * mult
        n //= 10
        mult = 2 if mult == 7 else mult + 1
    r = 11 - (total % 11)
    if r == 11:
        return "0"
    if r == 10:
        return "K"
    return str(r)

def random_rut(min_rut: int = 5_000_000, max_rut: int = 90_000_000) -> str:
    n = random.randint(min_rut, max_rut)
    return f"{n}-{rut_dv(n)}"


# ----------------------------
# 4) Synthetic data pools
# ----------------------------
TIPOS_DTE = ["33", "34", "39", "61", "56"]
TIPOS_DTE_W = [0.55, 0.10, 0.25, 0.06, 0.04]

WORDS = [
    "SERVICIO","INSUMO","EQUIPO","REPUESTO","MANTENCION","FILTRO","SENSOR","CABLE","BATERIA","VALVULA",
    "KIT","MODULO","PACK","TUBO","BOLSA","GUANTE","MASCARILLA","CATETER","JERINGA","SOLUCION","PRUEBA",
    "DIAGNOSTICO","CALIBRACION","INSTALACION","TRANSPORTE","ARRIENDO","MONITOREO","REPARACION","COMUNICACION",
    "SOPORTE","ACTUALIZACION","LICENCIA","SOFTWARE","HARDWARE","PROCEDIMIENTO","CLINICO","HOSPITALARIO"
]
SUFFIX = ["SpA","Ltda","S.A.","EIRL","Limitada"]
GIROS = ["Servicios Medicos","Mantenimiento Equipos","Insumos Clinicos","Tecnologia Medica","Servicios TI","Logistica"]
COMUNAS = ["VALPARAISO","VINA DEL MAR","QUILPUE","VILLA ALEMANA","SAN ANTONIO","LOS ANDES","LA CALERA","LIMACHE"]
REGIONES = ["VALPARAISO","METROPOLITANA","BIOBIO","COQUIMBO","ARAUCANIA"]

def rand_text_words(min_w: int, max_w: int) -> str:
    return " ".join(random.choice(WORDS) for _ in range(random.randint(min_w, max_w)))

def rand_company(max_len: int = 60) -> str:
    base = rand_text_words(1, 4)
    name = f"{base} {random.choice(SUFFIX)}"
    if random.random() < 0.40:
        name += f" {random.randint(1, 999)}"
    if random.random() < 0.25:
        name = name.replace(" ", "-")
    return name[:max_len]

def rand_address(max_len: int = 80) -> str:
    street = random.choice(["AV", "CALLE", "PASAJE", "CAMINO", "RUTA"])
    nm = random.choice(["LIBERTAD", "ESPAÑA", "ARGENTINA", "COLON", "PRAT", "CENTRAL", "NORTE", "SUR"])
    addr = f"{street} {nm} {random.randint(1,9999)}"
    if random.random() < 0.35:
        addr += f" OF {random.randint(1, 999)}"
    return addr[:max_len]

def rand_amount_clp(min_amt: int = 500, max_amt: int = 50_000_000) -> int:
    x = random.random() ** 2.0
    amt = int(min_amt + x * (max_amt - min_amt))
    step = weighted_choice([10, 100, 1000, 10000], [0.10, 0.35, 0.35, 0.20])
    return max(min_amt, (amt // step) * step)

def rand_issue_date(days_back: int = 365) -> date:
    return date.today() - timedelta(days=random.randint(0, days_back))

def rand_tsted(days_back: int = 365) -> datetime:
    return datetime.now() - timedelta(seconds=random.randint(0, days_back * 24 * 3600))

def rand_item_line(max_len: int = 180) -> str:
    desc = rand_text_words(3, 12)
    code = f"{random.choice(['SKU','REF','COD','INT'])}-{rand_digits(random.randint(4,7))}"
    um = random.choice(["UN","CJ","PK","HRS","DIA","MES"])
    qty = random.randint(1, 25)
    return f"{desc} {code} {qty}{um}"[:max_len]


# ----------------------------
# 5) Synthetic CAF + FRMT (bounded)
# ----------------------------
def fake_caf_xml() -> str:
    core = rand_alnum(16)
    extra = rand_alnum(random.choice([120, 160, 200, 240]))
    return (
        f'<CAF version="1.0"><DA>'
        f'<RE>{random_rut()}</RE><TD>{weighted_choice(TIPOS_DTE, TIPOS_DTE_W)}</TD>'
        f'<RNG><D>{random.randint(1,5000)}</D><H>{random.randint(5001,9_999_999)}</H></RNG>'
        f'<FA>{date.today().strftime("%Y-%m-%d")}</FA>'
        f'<RSAPK><M>{core}{extra}</M><E>65537</E></RSAPK>'
        f'</DA></CAF>'
    )

def fake_frmt(dd_xml: str) -> str:
    d1 = hashlib.sha256(dd_xml.encode("utf-8")).digest()
    d2 = hashlib.sha1(dd_xml.encode("utf-8")).digest()
    return base64.b64encode(d1 + d2).decode("ascii")


# ----------------------------
# 6) TED builder (heavy-ish, varied)
# ----------------------------
def build_ted_xml(
    rut_emisor: str,
    tipo_dte: str,
    folio: int,
    fe: date,
    rut_receptor: str,
    days_back: int,
    accent_prob: float,
    items_min: int,
    items_max: int,
) -> str:
    rsr  = spanish_noise(rand_company(60), accent_prob=accent_prob)[:40]
    giro = spanish_noise(random.choice(GIROS), accent_prob=accent_prob)[:40]
    dirr = spanish_noise(rand_address(80), accent_prob=accent_prob * 0.6)[:70]
    cmna = spanish_noise(random.choice(COMUNAS), accent_prob=accent_prob * 0.4)[:20]
    rgn  = spanish_noise(random.choice(REGIONES), accent_prob=accent_prob * 0.4)[:20]

    items = [
        spanish_noise(rand_item_line(180), accent_prob=accent_prob * 0.7)
        for _ in range(random.randint(items_min, items_max))
    ]
    it1 = items[0][:40]

    mnt  = rand_amount_clp()
    neto = int(mnt * random.uniform(0.70, 0.95)) // 10 * 10
    iva  = max(0, mnt - neto)

    tsted = rand_tsted(days_back).strftime("%Y-%m-%dT%H:%M:%S")
    fe_str = fe.strftime("%Y-%m-%d")

    obs = spanish_noise(rand_text_words(10, 22), accent_prob=accent_prob)[:220]
    ref_fe = rand_issue_date(days_back).strftime("%Y-%m-%d")

    caf = fake_caf_xml()

    dd = (
        f"<DD>"
        f"<RE>{rut_emisor}</RE><TD>{tipo_dte}</TD><F>{folio}</F><FE>{fe_str}</FE>"
        f"<RR>{rut_receptor}</RR><RSR>{rsr}</RSR>"
        f"<GIR>{giro}</GIR><DIRR>{dirr}</DIRR><CMNA>{cmna}</CMNA><REG>{rgn}</REG>"
        f"<MNT>{mnt}</MNT><NETO>{neto}</NETO><IVA>{iva}</IVA>"
        f"<IT1>{it1}</IT1>"
        f"<OBS>{obs}</OBS>"
        f"<REF><TPO>801</TPO><FOL>{random.randint(1,999999)}</FOL><FE>{ref_fe}</FE></REF>"
        f"{caf}"
        f"<TSTED>{tsted}</TSTED>"
        f"</DD>"
    )

    frmt = fake_frmt(dd)
    return f'<TED version="1.0">{dd}<FRMT algoritmo="SHA1withRSA">{frmt}</FRMT></TED>'


# ----------------------------
# 7) PDF417 policies: ~20 columns (+/-)
# ----------------------------
@dataclass
class ColPolicy:
    min_cols: int = 18
    preferred_cols: int = 20
    max_cols: int = 22
    preferred_security: int = 4
    min_security: int = 2
    allow_reduce_security: bool = True

@dataclass
class SplitPolicy:
    enabled: bool = True
    chunk_chars: int = 700
    add_header: bool = True

@dataclass
class PadPolicy:
    enabled: bool = True
    pad_chunk: int = 220
    pad_max_rounds: int = 25


def _encode(payload: str, cols: int, sec: int):
    return pdf417gen.encode(payload, columns=cols, security_level=sec)

def encode_with_cols(payload: str, cp: ColPolicy):
    for cols in range(max(cp.preferred_cols, cp.min_cols), cp.max_cols + 1):
        try:
            return _encode(payload, cols, cp.preferred_security), cols, cp.preferred_security
        except ValueError as e:
            msg = str(e)
            if "Maximum is 90 rows" in msg:
                continue
            if "Data too long" in msg:
                raise
            if "Minimum is" in msg:
                raise
            raise

    if cp.allow_reduce_security:
        for sec in range(cp.preferred_security - 1, cp.min_security - 1, -1):
            for cols in range(max(cp.preferred_cols, cp.min_cols), cp.max_cols + 1):
                try:
                    return _encode(payload, cols, sec), cols, sec
                except ValueError as e:
                    msg = str(e)
                    if "Maximum is 90 rows" in msg:
                        continue
                    if "Data too long" in msg:
                        raise
                    if "Minimum is" in msg:
                        raise
                    raise

    raise ValueError("Could not encode within constraints.")

def pad_to_min_rows(payload: str, cp: ColPolicy, pp: PadPolicy) -> str:
    if not pp.enabled:
        return payload
    p = payload
    for _ in range(pp.pad_max_rounds):
        try:
            encode_with_cols(p, cp)
            return p
        except ValueError as e:
            msg = str(e)
            if "Minimum is" in msg and "Try decreasing column count" in msg:
                p += f"<PAD>{rand_alnum(pp.pad_chunk)}</PAD>"
                continue
            raise
    raise ValueError("Could not reach minimum rows with padding. Increase pad_chunk.")

def split_payload(payload: str, sp: SplitPolicy) -> List[str]:
    parts = [payload[i:i+sp.chunk_chars] for i in range(0, len(payload), sp.chunk_chars)]
    if not sp.add_header:
        return parts
    total = len(parts)
    return [f"[PART {i+1}/{total}]" + parts[i] for i in range(total)]

def encode_payload(payload: str, cp: ColPolicy, sp: SplitPolicy, pp: PadPolicy):
    try:
        p1 = pad_to_min_rows(payload, cp, pp)
        codes, cols, sec = encode_with_cols(p1, cp)
        return [(p1, codes, cols, sec, 1, 1)]
    except ValueError as e:
        if "Data too long" not in str(e):
            raise
        if not sp.enabled:
            raise

    parts = split_payload(payload, sp)
    out = []
    total = len(parts)
    for i, part in enumerate(parts, start=1):
        part2 = pad_to_min_rows(part, cp, pp)
        try:
            codes, cols, sec = encode_with_cols(part2, cp)
            out.append((part2, codes, cols, sec, i, total))
        except ValueError as e:
            msg = str(e)
            if "Data too long" in msg:
                new_chunk = max(240, sp.chunk_chars // 2)
                return encode_payload(payload, cp, SplitPolicy(True, new_chunk, sp.add_header), pp)
            raise
    return out


# ----------------------------
# 8) Image degradations (mild, randomized)
# ----------------------------
@dataclass
class NoiseConfig:
    enabled: bool = True

    # probabilities (0..1)
    p_blur: float = 0.65
    p_color: float = 0.70
    p_saltpepper: float = 0.55
    p_gaussian: float = 0.45
    p_perspective: float = 0.55
    p_rotate: float = 0.55
    p_yellow: float = 0.55
    p_resample: float = 0.35

    # ranges (kept mild to preserve readability)
    blur_radius: Tuple[float, float] = (0.0, 1.2)
    brightness: Tuple[float, float] = (0.92, 1.08)
    contrast: Tuple[float, float] = (0.92, 1.10)
    saturation: Tuple[float, float] = (0.85, 1.15)
    hue_shift_deg: Tuple[float, float] = (-6.0, 6.0)

    saltpepper_amount: Tuple[float, float] = (0.0005, 0.008)  # fraction of pixels
    gaussian_sigma: Tuple[float, float] = (0.0, 8.0)          # noise std in 0..255 scale (mild)

    rotate_deg: Tuple[float, float] = (-2.0, 2.0)
    shear_x: Tuple[float, float] = (-0.04, 0.04)              # affine shear (mild)
    shear_y: Tuple[float, float] = (-0.02, 0.02)

    perspective_strength: Tuple[float, float] = (0.01, 0.07)   # keystone amount (fraction of width/height)

    yellow_strength: Tuple[float, float] = (0.03, 0.18)

    # Downscale-upscale
    resample_scale: Tuple[float, float] = (0.88, 0.98)        # 12% max downscale


def _clamp_u8(a: np.ndarray) -> np.ndarray:
    return np.clip(a, 0, 255).astype(np.uint8)

def _hue_shift_rgb(img: Image.Image, deg: float) -> Image.Image:
    # Convert to HSV, shift hue channel
    hsv = img.convert("HSV")
    arr = np.array(hsv, dtype=np.uint8)
    # Hue in PIL HSV is 0..255 corresponding to 0..360 deg
    shift = int((deg / 360.0) * 255) % 255
    arr[..., 0] = (arr[..., 0].astype(int) + shift) % 255
    return Image.fromarray(arr, mode="HSV").convert("RGB")

def _add_gaussian_noise(img: Image.Image, sigma: float) -> Image.Image:
    if sigma <= 0:
        return img
    arr = np.array(img, dtype=np.float32)
    noise = np.random.normal(0.0, sigma, size=arr.shape).astype(np.float32)
    arr2 = _clamp_u8(arr + noise)
    return Image.fromarray(arr2, mode="RGB")

def _add_salt_pepper(img: Image.Image, amount: float) -> Image.Image:
    if amount <= 0:
        return img
    arr = np.array(img, dtype=np.uint8)
    h, w, _ = arr.shape
    n = int(h * w * amount)
    if n <= 0:
        return img
    # half salt half pepper
    ys = np.random.randint(0, h, size=n)
    xs = np.random.randint(0, w, size=n)
    salt_mask = np.random.rand(n) < 0.5
    arr[ys[salt_mask], xs[salt_mask]] = 255
    arr[ys[~salt_mask], xs[~salt_mask]] = 0
    return Image.fromarray(arr, mode="RGB")

def _apply_yellowing(img: Image.Image, strength: float) -> Image.Image:
    # Blend with a warm paper tone
    if strength <= 0:
        return img
    overlay = Image.new("RGB", img.size, (255, 244, 214))  # mild yellow paper
    return Image.blend(img, overlay, alpha=float(strength))

def _affine_shear(img: Image.Image, shx: float, shy: float) -> Image.Image:
    w, h = img.size
    # PIL affine: (a, b, c, d, e, f) mapping x' = a*x + b*y + c ; y' = d*x + e*y + f
    a = 1.0
    b = shx
    d = shy
    e = 1.0
    # shift to keep content in frame
    c = -b * h / 2
    f = -d * w / 2
    return img.transform((w, h), Image.AFFINE, (a, b, c, d, e, f), resample=Image.BICUBIC)

def _perspective_coeffs(src, dst):
    # Solve for perspective transform coefficients
    # src/dst are [(x,y)*4]
    A = []
    B = []
    for (x, y), (u, v) in zip(src, dst):
        A.append([x, y, 1, 0, 0, 0, -u*x, -u*y])
        A.append([0, 0, 0, x, y, 1, -v*x, -v*y])
        B.append(u)
        B.append(v)
    A = np.array(A, dtype=np.float64)
    B = np.array(B, dtype=np.float64)
    coeffs = np.linalg.lstsq(A, B, rcond=None)[0]
    return tuple(coeffs.tolist())

def _trapezoid_perspective(img: Image.Image, strength: float) -> Image.Image:
    if strength <= 0:
        return img
    w, h = img.size
    dx = w * strength
    dy = h * strength * 0.7

    # random trapezoid direction
    top_in = random.random() < 0.5
    left_in = random.random() < 0.5

    # Build destination quad
    # Start as rectangle corners:
    # (0,0) (w,0) (w,h) (0,h)
    # then move corners slightly
    if top_in:
        tlx = dx if left_in else 0
        trx = w - (0 if left_in else dx)
    else:
        tlx = 0
        trx = w

    if not top_in:
        # bottom in
        blx = dx if left_in else 0
        brx = w - (0 if left_in else dx)
    else:
        blx = 0
        brx = w

    # vertical skew
    tly = dy * (random.random() * 0.8)
    try_ = dy * (random.random() * 0.8)
    bly = h - dy * (random.random() * 0.8)
    bry = h - dy * (random.random() * 0.8)

    src = [(0,0), (w,0), (w,h), (0,h)]
    dst = [(tlx, tly), (trx, try_), (brx, bry), (blx, bly)]
    coeffs = _perspective_coeffs(src, dst)
    return img.transform((w, h), Image.PERSPECTIVE, coeffs, resample=Image.BICUBIC)

def _down_up_sample(img: Image.Image, scale: float) -> Image.Image:
    if not (0 < scale < 1):
        return img
    w, h = img.size
    w2 = max(1, int(w * scale))
    h2 = max(1, int(h * scale))
    small = img.resize((w2, h2), resample=Image.BILINEAR)
    return small.resize((w, h), resample=Image.BILINEAR)

def apply_random_degradations(img: Image.Image, nc: NoiseConfig) -> Image.Image:
    if not nc.enabled:
        return img

    # work in RGB
    im = img.convert("RGB")

    # Small rotation (expand=False to keep same size)
    if random.random() < nc.p_rotate:
        deg = random.uniform(*nc.rotate_deg)
        im = im.rotate(deg, resample=Image.BICUBIC, expand=False, fillcolor=(255, 255, 255))

    # Shear
    if random.random() < 0.35:
        shx = random.uniform(*nc.shear_x)
        shy = random.uniform(*nc.shear_y)
        im = _affine_shear(im, shx, shy)

    # Perspective trapezoid / keystone
    if random.random() < nc.p_perspective:
        strength = random.uniform(*nc.perspective_strength)
        im = _trapezoid_perspective(im, strength)

    # Color jitter (mild)
    if random.random() < nc.p_color:
        im = ImageEnhance.Brightness(im).enhance(random.uniform(*nc.brightness))
        im = ImageEnhance.Contrast(im).enhance(random.uniform(*nc.contrast))
        im = ImageEnhance.Color(im).enhance(random.uniform(*nc.saturation))
        # slight hue shift
        if random.random() < 0.45:
            im = _hue_shift_rgb(im, random.uniform(*nc.hue_shift_deg))

    # Yellowing
    if random.random() < nc.p_yellow:
        im = _apply_yellowing(im, random.uniform(*nc.yellow_strength))

    # Down-up resample softening
    if random.random() < nc.p_resample:
        im = _down_up_sample(im, random.uniform(*nc.resample_scale))

    # Gaussian blur
    if random.random() < nc.p_blur:
        r = random.uniform(*nc.blur_radius)
        if r > 0:
            im = im.filter(ImageFilter.GaussianBlur(radius=r))

    # Gaussian pixel noise
    if random.random() < nc.p_gaussian:
        sigma = random.uniform(*nc.gaussian_sigma)
        if sigma > 0.1:
            im = _add_gaussian_noise(im, sigma=sigma)

    # Salt & pepper
    if random.random() < nc.p_saltpepper:
        amt = random.uniform(*nc.saltpepper_amount)
        if amt > 0:
            im = _add_salt_pepper(im, amt)

    return im


# ----------------------------
# 9) Rendering (with degradations)
# ----------------------------
@dataclass
class RenderConfig:
    fmt: str = "PNG"
    scale: int = 8
    ratio: int = 3
    padding: int = 22
    quality: int = 95
    add_noise: bool = True

def render_pdf417_image(codes, rcfg: RenderConfig) -> Image.Image:
    # pdf417gen returns a PIL image (usually "1" or "L")
    img = pdf417gen.render_image(codes, scale=rcfg.scale, ratio=rcfg.ratio, padding=rcfg.padding)
    return img

def save_image_with_noise(img: Image.Image, out_path: Path, rcfg: RenderConfig, nc: NoiseConfig) -> None:
    out_path.parent.mkdir(parents=True, exist_ok=True)

    im = img
    if rcfg.add_noise:
        im = apply_random_degradations(im, nc)

    fmt = rcfg.fmt.upper()
    if fmt in ("JPG", "JPEG"):
        if im.mode != "RGB":
            im = im.convert("RGB")
        im.save(out_path, format="JPEG", quality=rcfg.quality)
    else:
        # PNG
        if im.mode not in ("RGB", "RGBA"):
            im = im.convert("RGB")
        im.save(out_path, format="PNG")


# ----------------------------
# 10) Bulk generation + CSV(image,text)
# ----------------------------
@dataclass
class GenConfig:
    n: int = 200
    out_dir: str = "out_pdf417_cols20_csv_noise"
    csv_name: str = "mapping.csv"

    days_back: int = 365
    randomize_emisor: bool = True
    fixed_emisor_rut: str = "76123456-7"

    start_folio: int = 1
    folio_jump_prob: float = 0.18
    folio_jump_max: int = 50_000

    accent_prob: float = 0.22
    items_min: int = 8
    items_max: int = 16


def generate_many(
    cfg: GenConfig,
    cp: ColPolicy,
    sp: SplitPolicy,
    pp: PadPolicy,
    rcfg: RenderConfig,
    nc: NoiseConfig,
    seed: Optional[int] = None,
):
    random.seed(seed if seed is not None else None)
    np.random.seed(seed if seed is not None else None)

    out_dir = Path(cfg.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    csv_path = out_dir / cfg.csv_name
    folio_state = cfg.start_folio

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["image", "text"])  # EXACTLY: (image, text)

        for _ in range(cfg.n):
            rut_emisor = random_rut() if cfg.randomize_emisor else cfg.fixed_emisor_rut
            tipo_dte = weighted_choice(TIPOS_DTE, TIPOS_DTE_W)

            if random.random() < cfg.folio_jump_prob:
                folio_state += random.randint(2, cfg.folio_jump_max)
            folio = folio_state
            folio_state += 1

            fe = rand_issue_date(cfg.days_back)
            rut_receptor = random_rut()

            payload = build_ted_xml(
                rut_emisor=rut_emisor,
                tipo_dte=tipo_dte,
                folio=folio,
                fe=fe,
                rut_receptor=rut_receptor,
                days_back=cfg.days_back,
                accent_prob=cfg.accent_prob,
                items_min=cfg.items_min,
                items_max=cfg.items_max,
            )

            parts = encode_payload(payload, cp, sp, pp)

            ext = "png" if rcfg.fmt.upper() == "PNG" else "jpg"
            base = f"TED_TD{tipo_dte}_F{folio}_{safe_filename(rut_emisor)}"

            for (payload_used, codes, cols, sec, pidx, ptotal) in parts:
                if ptotal == 1:
                    rel_img = Path(f"{base}_c{cols}_s{sec}.{ext}")
                else:
                    rel_img = Path(f"{base}_c{cols}_s{sec}_p{pidx:03d}of{ptotal:03d}.{ext}")

                img_path = out_dir / rel_img

                clean_img = render_pdf417_image(codes, rcfg)
                save_image_with_noise(clean_img, img_path, rcfg, nc)

                # CSV row: (image, FULL payload string encoded in THAT image)
                w.writerow([str(rel_img).replace("\\", "/"), payload_used])

    print(f"✅ Images in: {out_dir.resolve()}")
    print(f"✅ CSV (image,text) at: {csv_path.resolve()}")


# ----------------------------
# 11) RUN
# ----------------------------
cfg = GenConfig(
    n=10,
    out_dir="out_pdf417_cols20_csv_noise",
    csv_name="mapping.csv",
    accent_prob=0.22,
    items_min=8,
    items_max=16,
)

cp = ColPolicy(
    min_cols=18,
    preferred_cols=20,
    max_cols=22,
    preferred_security=4,
    min_security=2,
    allow_reduce_security=True,
)

sp = SplitPolicy(enabled=True, chunk_chars=700, add_header=True)
pp = PadPolicy(enabled=True, pad_chunk=220, pad_max_rounds=25)

rcfg = RenderConfig(fmt="PNG", scale=8, ratio=3, padding=22, add_noise=True)

# Noise stays mild so barcodes remain readable
# Stronger: yellowing, blur, salt&pepper, gaussian (≈2x)
# Slightly lighter: perspective

nc = NoiseConfig(
    enabled=True,

    # probabilities (optional: keep same or slightly higher for the 4 effects)
    p_blur=0.90,
    p_color=0.70,
    p_saltpepper=0.70,
    p_gaussian=0.75,
    p_perspective=0.45,
    p_rotate=0.55,
    p_yellow=0.80,
    p_resample=0.35,

    # 2x-ish strength
    blur_radius=(0.5, 3),                 # was (0.0, 1.2)
    gaussian_sigma=(1, 25.0),             # was (0.0, 8.0)
    saltpepper_amount=(0.01, 0.02),       # was (0.0005, 0.008)
    yellow_strength=(0.1, 0.36),           # was (0.03, 0.18)

    # perspective slightly lighter
    perspective_strength=(0.008, 0.012),     # was (0.01, 0.07)

    # keep these as-is (or tweak if you want)
    brightness=(0.92, 1.08),
    contrast=(0.92, 1.10),
    saturation=(0.85, 1.15),
    hue_shift_deg=(-6.0, 6.0),
    rotate_deg=(-2.0, 2.0),
    shear_x=(-0.04, 0.04),
    shear_y=(-0.02, 0.02),
    resample_scale=(0.88, 0.98),
)


generate_many(cfg, cp, sp, pp, rcfg, nc)


✅ Images in: C:\Users\tasep\OneDrive\Documentos\Lyon TI\PDF417\out_pdf417_cols20_csv_noise
✅ CSV (image,text) at: C:\Users\tasep\OneDrive\Documentos\Lyon TI\PDF417\out_pdf417_cols20_csv_noise\mapping.csv
