In [1]:
from pathlib import Path
import json
from typing import List, Tuple, Union, Iterable, Dict, Optional

In [2]:
def load_json_or_jsonl(path: Path) -> Tuple[list, bool]:
    """
    Carga un archivo JSON o JSONL.
    Retorna: (data:list[dict], es_jsonl:bool)
    """
    txt = path.read_text(encoding="utf-8")
    try:
        data = json.loads(txt)
        if isinstance(data, dict):
            data = [data]
        return data, False
    except Exception:
        data = []
        for line in txt.splitlines():
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))
        return data, True

def dump_json_or_jsonl(path: Path, data: list, es_jsonl: bool) -> None:
    """
    Guarda data en JSON (bonito) o JSONL según `es_jsonl`.
    """
    if es_jsonl:
        with path.open("w", encoding="utf-8") as f:
            for row in data:
                f.write(json.dumps(row, ensure_ascii=False) + "\n")
    else:
        with path.open("w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)


In [3]:
def replace_ids_in_data(
    data: list,
    empresa_mapping: Optional[Dict[Union[str, int], str]] = None,
    usuario_mapping: Optional[Dict[Union[str, int], str]] = None,
    as_oid: bool = False,
    empresa_key: str = "idEmpresa",
    usuario_key: str = "idUsuario",
) -> Tuple[int, int]:
    """
    Reemplaza campos top-level `idEmpresa` y/o `idUsuario` según los mapping dados.
    - empresa_mapping / usuario_mapping: dict {valor_actual -> nuevo_id_str}
    - as_oid=True: escribe como {"$oid": "<nuevo_id_str>"}
    Devuelve: (reemplazos_empresa, reemplazos_usuario)
    """
    rep_emp = rep_usr = 0

    if empresa_mapping:
        emp_map = {k: ({"$oid": v} if as_oid else v) for k, v in empresa_mapping.items()}
    else:
        emp_map = {}

    if usuario_mapping:
        usr_map = {k: ({"$oid": v} if as_oid else v) for k, v in usuario_mapping.items()}
    else:
        usr_map = {}

    for obj in data:
        if not isinstance(obj, dict):
            continue

        if emp_map and empresa_key in obj and obj[empresa_key] in emp_map:
            obj[empresa_key] = emp_map[obj[empresa_key]]
            rep_emp += 1

        if usr_map and usuario_key in obj and obj[usuario_key] in usr_map:
            obj[usuario_key] = usr_map[obj[usuario_key]]
            rep_usr += 1

    return rep_emp, rep_usr


In [4]:
def process_file_with_mappings(
    file_path: Union[str, Path],
    empresa_mapping: Optional[Dict[Union[str, int], str]] = None,
    usuario_mapping: Optional[Dict[Union[str, int], str]] = None,
    as_oid: bool = False,
    inplace: bool = False,
) -> Tuple[Path, Tuple[int, int]]:
    """
    Aplica mappings de empresa y usuario a un archivo.
    Retorna: (ruta_salida, (reemplazos_empresa, reemplazos_usuario))
    """
    file_path = Path(file_path)
    data, es_jsonl = load_json_or_jsonl(file_path)

    rep_emp, rep_usr = replace_ids_in_data(
        data,
        empresa_mapping=empresa_mapping,
        usuario_mapping=usuario_mapping,
        as_oid=as_oid
    )

    if inplace:
        out_path = file_path
    else:
        if es_jsonl:
            out_path = file_path.with_name(
                (file_path.stem + ".updated.jsonl") if file_path.suffix == ".jsonl"
                else (file_path.name + ".updated.jsonl")
            )
        else:
            out_path = file_path.with_name(file_path.stem + ".updated.json")

    dump_json_or_jsonl(out_path, data, es_jsonl)
    return out_path, (rep_emp, rep_usr)


In [5]:
def process_many_with_mappings(
    files: List[Union[str, Path]],
    empresa_mapping: Optional[Dict[Union[str, int], str]] = None,
    usuario_mapping: Optional[Dict[Union[str, int], str]] = None,
    as_oid: bool = False,
    inplace: bool = False,
) -> Tuple[int, int]:
    total_emp = total_usr = 0
    for fp in files:
        out_path, (rep_emp, rep_usr) = process_file_with_mappings(
            fp, empresa_mapping=empresa_mapping, usuario_mapping=usuario_mapping,
            as_oid=as_oid, inplace=inplace
        )
        print(f"[OK] {Path(fp).name} -> {out_path.name} | empresa: {rep_emp} · usuario: {rep_usr}")
        total_emp += rep_emp
        total_usr += rep_usr
    print(f"Total reemplazos -> empresa: {total_emp} · usuario: {total_usr}")
    return total_emp, total_usr


In [6]:
# ==== MAPPING EMPRESA ====
ID_EMP01 = "68dac3e96c5866743b128d6b"
ID_EMP02 = "68dacb42b123b24a8c01b2ef"
ID_EMP03 = "68dacb5eb123b24a8c01b2f5"

group_emp01 = ["EMP001", "Empresa 01", "empresa 01", "EMPRESA 01", "01"]
group_emp02 = ["EMP002", "Empresa 02", "empresa 02", "EMPRESA 02", "02"]
group_emp03 = ["EMP003", "Empresa 03", "empresa 03", "EMPRESA 03", "03"]

empresa_mapping = {k: ID_EMP01 for k in group_emp01}
empresa_mapping.update({k: ID_EMP02 for k in group_emp02})
empresa_mapping.update({k: ID_EMP03 for k in group_emp03})

# ==== MAPPING USUARIO ====
usuario_mapping = {
    # EMP001
    "EMP001-U1":  "68dac36f6c5866743b128d66",
    "EMP001-U2":  "68dacffa917f846f1de559ca",
    "EMP001-U3":  "68dad008917f846f1de559cf",
    "EMP001-U28": "68dad00f917f846f1de559d4",
    "EMP001-U29": "68dad015917f846f1de559d9",
    "EMP001-U30": "68dad01a917f846f1de559de",

    # EMP002
    "EMP002-U10": "68dad020917f846f1de559e3",
    "EMP002-U11": "68dad039917f846f1de559e8",
    "EMP002-U12": "68dad040917f846f1de559ed",
    "EMP002-U31": "68dad047917f846f1de559f2",
    "EMP002-U32": "68dad04d917f846f1de559f7",
    "EMP002-U33": "68dad055917f846f1de559fc",

    # EMP003
    "EMP003-U19": "68dad05b917f846f1de55a01",
    "EMP003-U20": "68dad074917f846f1de55a06",
    "EMP003-U21": "68dad07a917f846f1de55a0b",
    "EMP003-U34": "68dad080917f846f1de55a10",
    "EMP003-U35": "68dad086917f846f1de55a15",
    "EMP003-U36": "68dad08b917f846f1de55a1a",
}

files = [
    Path(r"company_sustainability.json"),
    Path(r"company_sustainability_month.json"),
    Path(r"tickets peaje electrico.json"),
    Path(r"tickets_ev_sinteticos.json"),
    Path(r"tickets_peaje.json"),
    Path(r"tickets_sinteticos.json"),
]

_ = process_many_with_mappings(
    files,
    empresa_mapping=empresa_mapping,   #
    usuario_mapping=usuario_mapping,   
    as_oid=False,
    inplace=False                      
)

[OK] company_sustainability.json -> company_sustainability.updated.json | empresa: 36 · usuario: 18
[OK] company_sustainability_month.json -> company_sustainability_month.updated.json | empresa: 252 · usuario: 126
[OK] tickets peaje electrico.json -> tickets peaje electrico.updated.json | empresa: 224 · usuario: 224
[OK] tickets_ev_sinteticos.json -> tickets_ev_sinteticos.updated.json | empresa: 450 · usuario: 450
[OK] tickets_peaje.json -> tickets_peaje.updated.json | empresa: 622 · usuario: 208
[OK] tickets_sinteticos.json -> tickets_sinteticos.updated.json | empresa: 1350 · usuario: 450
Total reemplazos -> empresa: 2934 · usuario: 1476
