In [1]:
import json
import numpy as np
import pandas as pd

from cedenar_anomalies.utils.paths import (
    data_processed_dir,
    data_raw_dir,
    reports_dir
)

In [2]:
# === Archivos de entrada ===
API_JSON_PATH = data_raw_dir("api_result.json")
MAPPING_JSON_PATH = data_raw_dir("category_mappings_anomaly_ext.json")
OUTPUT_CSV_PATH = data_processed_dir("output_datos_filtrados.csv")

# === Columnas requeridas ===
columns_to_keep = [
    "AREA", "item_288", "odt", "orden", "PLAN_COMERCIAL", "Descripcion",
    "reincidente", "Anomalia_conf", "ZONA", "año", "LATI_USU", "LONG_USU", "NIVEL",
    "item_68", "item_74", "item_237", "item_248", "item_597", "item_602", "item_108",
    "item_43", "item_603", "item_599", "item_35", "item_598", "item_33", "item_601",
    "item_24", "item_23"
]


In [3]:
# === Cargar datos ===
with open(API_JSON_PATH, "r", encoding="utf-8") as f:
    raw_data = json.load(f)

with open(MAPPING_JSON_PATH, "r", encoding="utf-8") as f:
    mappings = json.load(f)

In [None]:
# === Procesamiento ===
data = []

for record in raw_data:
    new_record = {}

    # Renombrar claves numéricas a formato item_xxx
    for k, v in record.items():
        if k.isdigit():
            new_record[f"item_{k}"] = v
        else:
            new_record[k] = v

    # === Extraer latitud y longitud si existe item_118 ===
    if "item_118" in new_record and isinstance(new_record["item_118"], str):
        parts = new_record["item_118"].split(",")
        if len(parts) == 2:
            new_record["LATI_USU"] = parts[0].strip()
            new_record["LONG_USU"] = parts[1].strip()
        else:
            new_record["LATI_USU"] = None
            new_record["LONG_USU"] = None
    else:
        new_record["LATI_USU"] = None
        new_record["LONG_USU"] = None

    # === Extraer año de item_293 si posible ===
    if "item_293" in new_record:
        try:
            new_record["año"] = pd.to_datetime(new_record["item_293"], dayfirst=True).year
        except Exception:
            new_record["año"] = None
    else:
        new_record["año"] = None

    # === Asignar campos personalizados ===
    new_record["odt"] = record.get("288", None)
    new_record["orden"] = record.get("287", None)
    new_record["Descripcion"] = record.get("62", None)
    new_record["ZONA"] = record.get("1", None)
    new_record["AREA"] = record.get("169", None)  # sin mapeo por ahora

    # PLAN_COMERCIAL con mapeo
    new_record["PLAN_COMERCIAL"] = mappings.get("PLAN_COMERCIAL", {}).get(record.get("14", ""), None)

    # Otros campos no mapeados aún
    new_record["reincidente"] = None
    new_record["Anomalia_conf"] = None
    # new_record["NIVEL"] = new_record.get("item_132", None)

    data.append(new_record)


In [None]:
# === Crear DataFrame y filtrar columnas ===
df = pd.DataFrame(data)

# Agregar columnas faltantes con valores None
for col in columns_to_keep:
    if col not in df.columns:
        df[col] = None

df_final = df[columns_to_keep]


In [None]:
# === Guardar CSV ===
df_final.to_csv(OUTPUT_CSV_PATH, index=False, encoding="utf-8")
print(f"CSV generado: {OUTPUT_CSV_PATH}")