In [1]:
from pathlib import Path
import os
import sys
import shutil
import importlib
import subprocess
import yaml
from time import perf_counter
import math

# Navegar al root del proyecto
root = Path.cwd().resolve()
while root != root.parent and not (root / "configs").exists():
    root = root.parent
os.chdir(root)

if str(root) not in sys.path:
    sys.path.insert(0, str(root))

# Optimizaciones de rendimiento
print("="*60)
print("INICIALIZANDO")
print("="*60)

try:
    from src.utils.performance import enable_pandas_performance, get_optimal_workers
    enable_pandas_performance()
    workers = get_optimal_workers()
    print(f"? Optimizaciones habilitadas")
    print(f"? CPU cores: {os.cpu_count()}")
    print(f"? Workers: {workers}")
except Exception as e:
    print(f"? Optimizaciones no disponibles: {e}")
    workers = 1

# Verificar geopandas
try:
    import geopandas as gpd
    print(f"? geopandas {gpd.__version__}")
except Exception:
    print("Instalando geopandas...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "geopandas"])
    import geopandas as gpd
    print(f"? geopandas {gpd.__version__}")

# Reload modules
import src.reporting.render_report as render_report
importlib.reload(render_report)
from src.reporting.render_report import run_provincia
from src.etl.ingest import load_raw

# Cargar provincias disponibles
provincias_cfg_path = Path("configs") / "provincias.yaml"
provincias_cfg = {}
if provincias_cfg_path.exists():
    provincias_cfg = yaml.safe_load(provincias_cfg_path.read_text(encoding="utf-8")) or {}

prov_map = provincias_cfg.get("provincias", {}) or {}

# Ordenar con PROVINCIA al final
all_provs = sorted(prov_map.keys())
if "PROVINCIA" in all_provs:
    all_provs.remove("PROVINCIA")
    available_provinces = all_provs + ["PROVINCIA"]
else:
    available_provinces = all_provs

print("" + "="*60)
print("SELECCI?N DE PROVINCIA")
print("="*60)
print("Provincias disponibles:")

# Mostrar en 3 columnas
n_cols = 3
n_items = len(available_provinces)
n_rows = math.ceil(n_items / n_cols)
col_width = max(len(p) for p in available_provinces) + 6  # "XX. " + nombre + espacio

for row in range(n_rows):
    line = ""
    for col in range(n_cols):
        idx = col * n_rows + row
        if idx < n_items:
            entry = f"{idx + 1:02d}. {available_provinces[idx]}"
            line += f"  {entry:<{col_width}}"
    print(line.rstrip())

# Selecci?n de provincia
selected_province = None
while selected_province is None:
    choice = input("Provincia (n?mero o nombre): ").strip()
    if not choice:
        print("? Debes ingresar una provincia v?lida.")
        continue
    if choice.isdigit():
        idx = int(choice)
        if 1 <= idx <= len(available_provinces):
            selected_province = available_provinces[idx - 1]
            break
    else:
        choice_norm = choice.upper().strip()
        if choice_norm in available_provinces:
            selected_province = choice_norm
            break
    print("? Provincia inv?lida, intenta otra vez.")

print(f"? Provincia seleccionada: {selected_province}")

# Selecci?n de intervalo
print("" + "="*60)
print("INTERVALO DE A?OS")
print("="*60)

use_all_years = input("?Usar todo el intervalo 2000-2024? (s/N): ").strip().lower() == "s"

if use_all_years:
    start_year = 2000
    end_year = 2024
else:
    # Pedir a?o inicio
    while True:
        start_input = input("A?o inicio (2000-2024): ").strip()
        try:
            start_year = int(start_input)
            if 2000 <= start_year <= 2024:
                break
            print("? A?o debe estar entre 2000 y 2024")
        except ValueError:
            print("? Ingresa un a?o v?lido")
    
    # Pedir a?o fin
    while True:
        end_input = input(f"A?o fin ({start_year}-2024): ").strip()
        try:
            end_year = int(end_input)
            if start_year <= end_year <= 2024:
                break
            print(f"? A?o debe estar entre {start_year} y 2024")
        except ValueError:
            print("? Ingresa un a?o v?lido")

print(f"? Intervalo: {start_year}-{end_year}")

# Modo p?blico (siempre activado)
public_mode = True
print(f"? Modo p?blico: activado (datos anonimizados)")

# Construir config override con intervalo personalizado
def build_configs_override(start_year: int, end_year: int, use_all_years: bool) -> Path:
    base = Path("configs")
    target = Path("notebooks") / "configs_override"
    target.mkdir(parents=True, exist_ok=True)
    
    global_cfg = yaml.safe_load((base / "global.yaml").read_text(encoding="utf-8")) or {}
    if not use_all_years:
        global_cfg["window_start_year"] = int(start_year)
        global_cfg["window_end_year"] = int(end_year)
    (target / "global.yaml").write_text(yaml.safe_dump(global_cfg, sort_keys=False), encoding="utf-8")
    
    if (base / "provincias.yaml").exists():
        (target / "provincias.yaml").write_text(
            (base / "provincias.yaml").read_text(encoding="utf-8"),
            encoding="utf-8",
        )
    return target

def resolve_raw_path(province: str) -> str | None:
    meta = prov_map.get(province.upper())
    if meta and meta.get("raw_path"):
        return meta.get("raw_path")
    fname = f"SRI_RUC_{province.upper()}.csv"
    return str(Path("data") / "raw" / fname)

cfg_dir = build_configs_override(start_year, end_year, use_all_years)
raw_path = resolve_raw_path(selected_province)
raw_path_obj = Path(raw_path) if raw_path else None
if not raw_path_obj or not raw_path_obj.exists():
    raise FileNotFoundError(f"No existe el raw para {selected_province}: {raw_path}")

# Cargar cantones disponibles
raw = load_raw(str(raw_path_obj))
if "DESCRIPCION_CANTON_EST" not in raw.columns:
    raise ValueError("El raw no incluye DESCRIPCION_CANTON_EST")

raw_for_cantones = raw
if "DESCRIPCION_PROVINCIA_EST" in raw.columns:
    prov_series = (
        raw["DESCRIPCION_PROVINCIA_EST"]
        .astype("string")
        .fillna("")
        .str.strip()
        .str.upper()
    )
    raw_for_cantones = raw.loc[prov_series == selected_province.upper()].copy()

cantones = (
    raw_for_cantones["DESCRIPCION_CANTON_EST"]
    .astype("string")
    .fillna("")
    .str.strip()
)
cantones = sorted([c for c in cantones.unique().tolist() if c])
if not cantones:
    raise ValueError(f"No hay cantones disponibles para {selected_province} en {raw_path_obj}")

print("" + "="*60)
print("SELECCI?N DE CANTONES")
print("="*60)
print(f"Cantones disponibles en {selected_province}:")

n_cols = 2
n_items = len(cantones)
n_rows = math.ceil(n_items / n_cols) if n_items else 0
col_width = max(len(c) for c in cantones) + 6 if n_items else 10

for row in range(n_rows):
    line = ""
    for col in range(n_cols):
        idx = col * n_rows + row
        if idx < n_items:
            entry = f"{idx + 1:02d}. {cantones[idx]}"
            line += f"  {entry:<{col_width}}"
    print(line.rstrip())

selected_cantones = None
while selected_cantones is None:
    choice = input("Cantones (n?meros separados por coma o 'todos'): ").strip().lower()
    if not choice:
        print("? Debes ingresar al menos un cant?n.")
        continue
    if choice in {"todos", "all", "*"}:
        selected_cantones = cantones
        break
    try:
        idxs = [int(x) for x in choice.replace(" ", "").split(",") if x]
    except ValueError:
        print("? Formato inv?lido. Usa n?meros separados por coma o 'todos'.")
        continue
    ok = [i for i in idxs if 1 <= i <= len(cantones)]
    if len(ok) != len(idxs):
        print("? Hay ?ndices fuera de rango. Intenta otra vez.")
        continue
    selected_cantones = [cantones[i - 1] for i in ok]

print(f"? Cantones seleccionados: {len(selected_cantones)}")

# Procesamiento por cant?n
print("" + "="*60)
print("PROCESANDO CANTONES")
print("="*60 + "")

start_time = perf_counter()
outputs = []
try:
    for idx, canton in enumerate(selected_cantones, start=1):
        print(f"--- [{idx}/{len(selected_cantones)}] {canton} ---")
        out_base = run_provincia(
            selected_province,
            configs_dir=str(cfg_dir),
            raw_dir="data/raw",
            raw_path=str(raw_path_obj),
            public_mode=public_mode,
            canton=canton,
        )
        outputs.append(out_base)
    elapsed = perf_counter() - start_time

    print("" + "="*60)
    print("COMPLETADO")
    print("="*60)
    print(f" Provincia: {selected_province}")
    print(f" Cantones: {len(selected_cantones)}")
    print(f" Tiempo: {elapsed:.2f}s ({elapsed/60:.2f} min)")
    if outputs:
        print(f" Output base: {outputs[0].parents[0]}")

except Exception as e:
    elapsed = perf_counter() - start_time
    print("" + "="*60)
    print("ERROR")
    print("="*60)
    print(f" {selected_province}: {e}")
    print(f"Tiempo: {elapsed:.2f}s")

finally:
    # Cleanup
    shutil.rmtree(cfg_dir, ignore_errors=True)
    print(f" Cleanup: configs_override eliminado")

print("Proceso finalizado!")


INICIALIZANDO
? Optimizaciones habilitadas
? CPU cores: 12
? Workers: 8
? geopandas 1.1.2
SELECCI?N DE PROVINCIA
Provincias disponibles:
  01. AZUAY                             10. GUAYAS                            19. PICHINCHA
  02. BOLIVAR                           11. IMBABURA                          20. SANTA ELENA
  03. CARCHI                            12. LOJA                              21. SANTO DOMINGO DE LOS TSACHILAS
  04. CAÃ‘AR                             13. LOS RIOS                          22. SUCUMBIOS
  05. CHIMBORAZO                        14. MANABI                            23. TUNGURAHUA
  06. COTOPAXI                          15. MORONA SANTIAGO                   24. ZAMORA CHINCHIPE
  07. EL ORO                            16. NAPO                              25. PROVINCIA
  08. ESMERALDAS                        17. ORELLANA
  09. GALAPAGOS                         18. PASTAZA
? Provincia seleccionada: BOLIVAR
INTERVALO DE A?OS
? Intervalo: 2000-2024
? Modo 