In [1]:
!pip install pandas pyreadstat



In [2]:
import pandas as pd
import xml.etree.ElementTree as ET
import json

In [7]:
dicx_path = "CEN24.dicX"
output_path = "pre.fields.json"

In [8]:
tree = ET.parse(dicx_path)
root = tree.getroot()

fields = []

for var in root.findall(".//variable"):
    name = var.findtext("name", default="").strip().upper()
    dtype = var.findtext("varType", default="string").strip().lower()
    label = var.findtext("label", default=name).strip()

    # Extraer mapeo de valueLabels si existen
    vlabels_mapping = {}
    value_labels_node = var.find("valueLabels")
    if value_labels_node is not None:
        for elem in value_labels_node.findall("valueLabelElement"):
            value_text = elem.findtext("value", default="").strip()
            label_text = elem.findtext("label", default="").strip()
            if value_text:
                vlabels_mapping[value_text] = label_text

    type_map = {
        "integer": "integer",
        "numeric": "number",
        "string": "string",
        "date": "date",
    }
    ftype = type_map.get(dtype, "string")

    categories = []
    for cat in var.findall("Category"):
        code = cat.attrib.get("code")
        if code is not None:
            try:
                code = code  # int(code)
            except ValueError:
                pass
            categories.append(code)

    field = {"name": name, "title": label, "type": ftype}
    if categories:
        field["constraints"] = {"enum": categories}
    elif vlabels_mapping:
        # Si no hay categorías pero se encontró valueLabels, se usa el mapeo
        field["constraints"] = {"enum": list(vlabels_mapping.keys())}
        field["valueLabels"] = vlabels_mapping

    fields.append(field)

In [9]:
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(fields, f, indent=2, ensure_ascii=False)

print(f"✅ Fields generado en {output_path}")

✅ Fields generado en pre.fields.json
