# Corredor

In [14]:
# scripts/convert.ipynb — Export mejorado con segmentación de dirección
# - Limpia propiedades irrelevantes
# - Colorea por corredor (azul/rojo/morado/amarillo, etc.)
# - Segmenta ida/vuelta:
#     • route_master: usa roles forward/backward de sus relations miembro
#     • route: separa ways por rol forward/backward si existen
#     • fallback: si hay dos relations con mismos extremos (from/to desordenados), etiqueta una como ida y la otra como vuelta
# - Escribe en data/raw/converted/corredores.json y corredores.geojson

import json, re
from pathlib import Path
from collections import defaultdict

# ---------- Paths ----------
def resolve_root():
    cwd = Path.cwd()
    if (cwd / "scripts").exists() and (cwd / "data").exists(): return cwd
    if cwd.name == "scripts" and (cwd.parent / "data").exists(): return cwd.parent
    for p in cwd.parents:
        if (p / "data").exists(): return p
    return cwd

ROOT = resolve_root()
NAME = "corredores.json"

# Busca input (Overpass preferido; si solo tienes GeoJSON, funcionará con fallback)
CANDIDATES = [
    ROOT / "data" / "raw" / "osm" / NAME,
    ROOT / "data" / "raw" / NAME,
]
IN_PATH = next((p for p in CANDIDATES if p.exists()), None)
if IN_PATH is None:
    matches = list((ROOT / "data").rglob(NAME)) if (ROOT / "data").exists() else []
    IN_PATH = matches[0] if matches else None
if IN_PATH is None:
    raise FileNotFoundError("No encontré corredores.json dentro de /data.")

OUT_DIR = ROOT / "data" / "raw" / "converted"
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_JSON = OUT_DIR / NAME
OUT_GEOJSON = OUT_DIR / "corredores.geojson"

# ---------- Helpers ----------
def is_geojson(x): return isinstance(x, dict) and x.get("type") == "FeatureCollection"
def is_overpass(x): return isinstance(x, dict) and isinstance(x.get("elements"), list)

DROP_KEYS = {"maxspeed","max_speed","source","created_by","note","fixme","FIXME",
             "is_in","import_uuid","check_date","survey:date","opening_hours",
             "start_date","end_date","change:date","website","phone","email",
             "wikidata","wikipedia","short_name","alt_name","old_name","operator:wikidata"}
DROP_PREFIXES = ("addr:","tiger:","gnis:","seamark:","source:","old_","contact:","mapillary")
KEEP_KEYS = {"name","ref","type","route","network","operator","from","to","description","colour","color"}

def clean_props(tags):
    if not tags: return {}
    out={}
    for k,v in tags.items():
        if k in KEEP_KEYS: out[k]=v; continue
        if k in DROP_KEYS: continue
        if any(k.startswith(p) for p in DROP_PREFIXES): continue
        # descartar el resto para mantener liviano
    return out

COLOR_MAP = {
    "azul":"#0074D9","rojo":"#FF4136","morado":"#6A3D9A","amarillo":"#FFDC00",
    "verde":"#2ECC40","celeste":"#7FDBFF","naranja":"#FF851B","plata":"#AAAAAA",
    "plateado":"#AAAAAA","gris":"#888888","negro":"#111111","rosa":"#F012BE","blanco":"#DDDDDD"
}
COLOR_WORD_RE = re.compile("|".join(sorted(COLOR_MAP.keys(), key=len, reverse=True)), re.IGNORECASE)

def infer_color_from_text(*texts):
    for t in texts:
        if not t: continue
        if isinstance(t,str) and re.fullmatch(r"#?[0-9A-Fa-f]{6}", t.strip()):
            v=t.strip(); return v if v.startswith("#") else f"#{v}"
        m = COLOR_WORD_RE.search(str(t))
        if m: return COLOR_MAP.get(m.group(0).lower())
    return None

def route_color(tags):
    return (infer_color_from_text(tags.get("colour") or tags.get("color")) or
            infer_color_from_text(tags.get("network")) or
            infer_color_from_text(tags.get("name")) or
            infer_color_from_text(tags.get("operator")) or "#444444")

def lonlat(n): return [n["lon"], n["lat"]]

def coords_from_way(way, nodes_by_id):
    if "geometry" in way and way["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

def build_indexes(elements):
    nodes, ways, relations, masters = {}, {}, [], []
    for el in elements:
        t=el.get("type")
        if t=="node": nodes[el["id"]]=el
        elif t=="way": ways[el["id"]]=el
        elif t=="relation":
            if (el.get("tags") or {}).get("type")=="route_master": masters.append(el)
            else: relations.append(el)
    return nodes, ways, relations, masters

# ---------- Segmentación dirección ----------
def split_relation_ways_by_role(rel, ways_by_id, nodes_by_id):
    """Devuelve dict {'forward': [coords], 'backward': [coords], 'both': [coords]} según rol en members."""
    out = {"forward":[], "backward":[], "both":[]}
    for m in rel.get("members", []):
        if m.get("type")!="way": continue
        w = ways_by_id.get(m.get("ref"))
        if not w: continue
        coords = coords_from_way(w, nodes_by_id)
        if not coords: continue
        role = (m.get("role") or "").lower()
        if role == "forward": out["forward"].append(coords)
        elif role == "backward": out["backward"].append(coords)
        else: out["both"].append(coords)
    return out

def features_from_route_relation(rel, ways_by_id, nodes_by_id, label_direction=None):
    """Crea features para una relation de ruta. Si hay roles forward/backward, crea 2 features.
       label_direction: fuerza 'ida'/'vuelta' cuando se llama desde un route_master."""
    tags = rel.get("tags", {}) or {}
    base = clean_props(tags)
    color = route_color(tags)
    split = split_relation_ways_by_role(rel, ways_by_id, nodes_by_id)

    feats=[]
    def mk_feat(lines, direction=None):
        if not lines: return
        props = {
            **base,
            "_osm_type":"relation",
            "_osm_id": rel["id"],
            "kind":"route",
            "stroke": color,
            "stroke-width": 4,
            "stroke-opacity":1.0
        }
        if direction:
            props["direction"]=direction
            if direction=="vuelta":
                props["stroke-dasharray"]="4,2"
        feats.append({"type":"Feature","geometry":{"type":"MultiLineString","coordinates":lines},"properties":props})

    if split["forward"] or split["backward"]:
        # Hay roles explícitos
        mk_feat(split["forward"] + split["both"], direction=label_direction or "ida")
        mk_feat(split["backward"] + split["both"], direction="vuelta" if (label_direction or True) else None)
    else:
        # Sin roles -> una sola feature (dirección desconocida o única)
        mk_feat(split["both"], direction=label_direction)

    # Paradas (si están en members)
    for m in rel.get("members", []):
        if m.get("type") != "node": continue
        role=(m.get("role") or "").lower()
        node=nodes_by_id.get(m.get("ref"))
        if not node: continue
        if role not in ("stop","stop_entry_only","stop_exit_only","platform","platform_entry_only","platform_exit_only"):
            continue
        n_tags=node.get("tags",{}) or {}
        n_clean=clean_props(n_tags)
        feats.append({
            "type":"Feature",
            "geometry":{"type":"Point","coordinates":lonlat(node)},
            "properties":{
                **n_clean,
                "role": role,
                "kind":"stop",
                "route_name": base.get("name"),
                "route_ref": base.get("ref"),
                "network": base.get("network"),
                "marker-color": color,
                "marker-symbol":"bus",
                "_osm_type":"node",
                "_osm_id": node["id"]
            }
        })
    return feats

def convert_overpass_with_directions(data):
    elements=data.get("elements",[])
    nodes, ways, relations, masters = build_indexes(elements)
    features=[]

    # 1) route_master: crear dos features (ida/vuelta) si los hijos traen roles
    master_children = set()
    for rm in masters:
        roles = defaultdict(list)  # role -> list[relation]
        for m in rm.get("members", []):
            if m.get("type")!="relation": continue
            child = next((r for r in relations if r["id"]==m.get("ref")), None)
            if not child: continue
            master_children.add(child["id"])
            roles[(m.get("role") or "").lower()].append(child)
        # forward/backward conocidos
        for child in roles.get("forward", []):
            features += features_from_route_relation(child, ways, nodes, label_direction="ida")
        for child in roles.get("backward", []):
            features += features_from_route_relation(child, ways, nodes, label_direction="vuelta")
        # otros (sin rol): agrégalos sin dirección
        for child in roles.get("", []):
            features += features_from_route_relation(child, ways, nodes, label_direction=None)

    # 2) relations sueltas (no incluidas en un master)
    loose = [r for r in relations if r["id"] not in master_children]
    # Para etiquetar ida/vuelta en loose, intenta emparejar por extremos
    by_key = defaultdict(list)  # key=frozenset({from,to,network,ref})
    for r in loose:
        t=r.get("tags",{}) or {}
        key=frozenset({t.get("from"), t.get("to"), t.get("network"), t.get("ref")})
        by_key[key].append(r)

    for key, group in by_key.items():
        if len(group)==2:
            # dos variantes -> marca ida/vuelta de forma consistente
            a,b = group
            features += features_from_route_relation(a, ways, nodes, label_direction="ida")
            features += features_from_route_relation(b, ways, nodes, label_direction="vuelta")
        else:
            # 1 o más de 2 (ramales) -> sin etiqueta rígida
            for r in group:
                features += features_from_route_relation(r, ways, nodes, label_direction=None)

    return {"type":"FeatureCollection","features":features}

# ---------- Ejecutar ----------
with IN_PATH.open("r", encoding="utf-8") as f:
    data = json.load(f)

if is_overpass(data):
    geojson = convert_overpass_with_directions(data)
elif is_geojson(data):
    # Si ya es GeoJSON (como tu archivo convertido), no tenemos roles → solo limpiamos props y dejamos color.
    feats=[]
    for ft in data.get("features", []):
        props=ft.get("properties",{}) or {}
        base=clean_props(props)
        color=route_color(props)
        kind=props.get("kind") or ("route" if ft.get("geometry",{}).get("type") in ("LineString","MultiLineString") else "stop" if ft.get("geometry",{}).get("type")=="Point" else "feature")
        style = {"stroke":color,"stroke-width":4,"stroke-opacity":1.0} if kind=="route" else {"marker-color":color,"marker-symbol":"bus"} if kind=="stop" else {}
        feats.append({"type":"Feature","geometry":ft.get("geometry"),"properties":{**base,**style}})
    geojson={"type":"FeatureCollection","features":feats}
else:
    raise ValueError("El input no es Overpass JSON ni GeoJSON.")

for out in (OUT_JSON, OUT_GEOJSON):
    with out.open("w", encoding="utf-8") as f: json.dump(geojson, f, ensure_ascii=False, indent=2)
    print("✔ Guardado:", out)

print("Rutas:", sum(1 for f in geojson["features"] if f["properties"].get("kind")=="route"))
print("Paradas:", sum(1 for f in geojson["features"] if f["properties"].get("kind")=="stop"))
print("Con etiqueta 'direction':", sum(1 for f in geojson["features"] if f["properties"].get("direction") in ("ida","vuelta")))


✔ Guardado: d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\corredores.json
✔ Guardado: d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\corredores.geojson
Rutas: 22
Paradas: 1474
Con etiqueta 'direction': 0


# Alimentadores

In [19]:
# scripts/convert.ipynb — Alimentadores con inventario e inclusión amplia
# - Incluye TODAS las relations route=bus del JSON.
# - Normaliza refs AN/AS (con o sin guión, mayúsc/minúsc) a AN-XX / AS-XX.
# - Colores: ORANGE para tu lista; TODO LO DEMÁS → YELLOW (#FFCD00).
# - Imprime todas las rutas detectadas y guarda un CSV con el inventario.

import csv, json, re
from pathlib import Path
from collections import defaultdict

# ---------- Paths ----------
def resolve_project_root() -> Path:
    cwd = Path.cwd()
    if cwd.name == "scripts" and (cwd.parent / "data").exists(): return cwd.parent
    if (cwd / "scripts").exists() and (cwd / "data").exists():   return cwd
    for p in cwd.parents:
        if (p / "data").exists(): return p
    return cwd

ROOT = resolve_project_root()
IN_PATH  = ROOT / "data" / "raw" / "osm" / "alimentadores.json"
OUT_DIR  = ROOT / "data" / "raw" / "converted"
OUT_JSON = OUT_DIR / "alimentadores.json"
OUT_GEO  = OUT_DIR / "alimentadores.geojson"
OUT_CSV  = OUT_DIR / "alimentadores_inventory.csv"
OUT_DIR.mkdir(parents=True, exist_ok=True)

if not IN_PATH.exists():
    raise FileNotFoundError(f"No se encontró el input: {IN_PATH}")

# ---------- Config colores ----------
ORANGE = "#FF4500"
YELLOW = "#FFCD00"
ORANGE_REFS = {
    "AN-01","AN-02","AN-05","AN-06","AN-07","AN-08",
    "AN-12","AN-13","AN-14","AN-15","AN-16","AN-17","AN-18"
}
COLOR_OVERRIDES = {ref: ORANGE for ref in ORANGE_REFS}

# ---------- Utilidades ----------
def is_overpass(obj): return isinstance(obj, dict) and isinstance(obj.get("elements"), list)
def is_geojson(obj):  return isinstance(obj, dict) and obj.get("type")=="FeatureCollection"

KEEP_KEYS = {"ref","name","from","to","network","operator","route","description"}
DROP_KEYS = {"maxspeed","max_speed","source","created_by","opening_hours","phone","email",
             "website","wikidata","wikipedia","short_name","alt_name","old_name",
             "check_date","survey:date","start_date","end_date"}
DROP_PREFIX = ("addr:","contact:","gnis:","tiger:","seamark:","source:")

def clean_props(tags: dict) -> dict:
    if not tags: return {}
    out={}
    for k,v in tags.items():
        if k in KEEP_KEYS: out[k]=v; continue
        if k in DROP_KEYS: continue
        if any(k.startswith(p) for p in DROP_PREFIX): continue
    return out

# Normaliza AN/AS con o sin guión: "an10", "AS 10", "AS-10" -> ("AS-10", True).
REF_AN_AS_RE = re.compile(r'^(?P<prefix>an|as)[\s\-]?(\d{1,2})$', re.IGNORECASE)
def normalize_ref(raw_ref: str|None, name: str|None) -> tuple[str|None, bool]:
    if not raw_ref:
        return (None, False)
    m = REF_AN_AS_RE.match(raw_ref.strip())
    if m:
        pref = m.group('prefix').upper()
        num  = m.group(2).zfill(2)
        return (f"{pref}-{num}", True)
    return (raw_ref.strip(), False)

def color_for(tags: dict) -> str:
    ref_raw = (tags.get("ref") or "").strip()
    name = (tags.get("name") or "")
    ref_norm, is_an_as = normalize_ref(ref_raw, name)
    # 1) overrides exactos
    if ref_norm in COLOR_OVERRIDES: 
        return COLOR_OVERRIDES[ref_norm]
    # 2) cualquier otra AN/AS (normalizada o no) -> amarillo
    if is_an_as:
        return YELLOW
    # 3) cualquier otra ruta extra (p. ej. "Gamarra" sin AN/AS) -> amarillo por defecto
    return YELLOW

def lonlat(n): return [n["lon"], n["lat"]]

def build_indexes(elements):
    nodes, ways, relations = {}, {}, []
    for el in elements:
        t = el.get("type")
        if t == "node": nodes[el["id"]] = el
        elif t == "way": ways[el["id"]] = el
        elif t == "relation": relations.append(el)
    return nodes, ways, relations

def coords_from_way(way, nodes_by_id):
    if "geometry" in way and way["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

def split_ways_by_role(rel, ways_by_id, nodes_by_id):
    parts = {"forward": [], "backward": [], "both": []}
    for m in rel.get("members", []):
        if m.get("type") != "way": continue
        w = ways_by_id.get(m.get("ref"))
        if not w: continue
        coords = coords_from_way(w, nodes_by_id)
        if not coords: continue
        role = (m.get("role") or "").lower()
        if role == "forward": parts["forward"].append(coords)
        elif role == "backward": parts["backward"].append(coords)
        else: parts["both"].append(coords)
    return parts

STOP_ROLES = {"stop","platform","stop_entry_only","platform_entry_only","stop_exit_only","platform_exit_only"}

def stops_from_relation(rel, nodes_by_id, base_color, base_props):
    feats=[]
    seen=set()
    for m in rel.get("members", []):
        if m.get("type") != "node": continue
        role = (m.get("role") or "").lower()
        if role not in STOP_ROLES: continue
        node = nodes_by_id.get(m.get("ref"))
        if not node or node["id"] in seen: continue
        seen.add(node["id"])
        feats.append({
            "type":"Feature",
            "geometry":{"type":"Point","coordinates": lonlat(node)},
            "properties": {
                **base_props,
                "kind":"stop",
                "stop_role": role,
                "marker-color": base_color,
                "marker-symbol":"bus",
                "_osm_type":"node",
                "_osm_id": node["id"],
            }
        })
    return feats

def route_feats_from_relation(rel, ways_by_id, nodes_by_id):
    tags = rel.get("tags", {}) or {}
    base = clean_props(tags)
    color = color_for(tags)
    ref_norm, _ = normalize_ref(tags.get("ref"), tags.get("name"))
    if ref_norm: base["ref_norm"] = ref_norm
    base.update({"_osm_type":"relation","_osm_id": rel["id"]})
    parts = split_ways_by_role(rel, ways_by_id, nodes_by_id)
    feats=[]

    def add_route(lines, direction=None, dashed=False):
        if not lines: return
        props = {
            **base,
            "kind":"route",
            "label": (base.get("ref_norm") or base.get("ref") or "") + (f" · {base.get('name')}" if base.get("name") else ""),
            "stroke": color,
            "stroke-width": 4,
            "stroke-opacity": 1.0
        }
        if direction: props["direction"] = direction
        if dashed: props["stroke-dasharray"] = "4,2"
        feats.append({
            "type":"Feature",
            "geometry":{"type":"MultiLineString","coordinates": lines},
            "properties": props
        })

    if parts["forward"] or parts["backward"]:
        add_route(parts["forward"] + parts["both"], direction="ida", dashed=False)
        add_route(parts["backward"] + parts["both"], direction="vuelta", dashed=True)
    else:
        add_route(parts["both"], direction=None, dashed=False)

    feats += stops_from_relation(rel, nodes_by_id, color, base)
    return feats

def convert_overpass_alimentadores(data):
    elements = data.get("elements", [])
    nodes, ways, relations = build_indexes(elements)

    # INCLUYE todas las relations route=bus (no sólo AN/AS).
    groups = defaultdict(list)
    for r in relations:
        t = r.get("tags", {}) or {}
        if t.get("route") != "bus": 
            continue
        key = (t.get("network"), t.get("ref"), r["id"])
        groups[key].append(r)

    features=[]
    inventory_rows=[]
    print("\n=== RUTAS DETECTADAS ===")
    print("id; ref_raw; ref_norm; name; network")
    for key, rels in groups.items():
        for r in rels:
            t = r.get("tags", {}) or {}
            ref_raw = (t.get("ref") or "").strip()
            name    = (t.get("name") or "").strip()
            network = (t.get("network") or "").strip()
            ref_norm, _ = normalize_ref(ref_raw, name)
            print(f"{r['id']}; {ref_raw or '-'}; {ref_norm or '-'}; {name or '-'}; {network or '-'}")
            inventory_rows.append([r["id"], ref_raw, ref_norm or "", name, network])

            # features para este relation
            features += route_feats_from_relation(r, ways, nodes)

    # Guardar inventario CSV
    with OUT_CSV.open("w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["relation_id","ref_raw","ref_norm","name","network"])
        w.writerows(inventory_rows)
    print(f"\n✔ Inventario guardado en: {OUT_CSV.relative_to(ROOT)} (total {len(inventory_rows)} relations)")

    return {"type":"FeatureCollection","features":features}

# ---------- Ejecutar ----------
data = json.loads(IN_PATH.read_text(encoding="utf-8"))

if not is_overpass(data) and not is_geojson(data):
    raise ValueError("El archivo no es Overpass JSON ni GeoJSON.")

if is_overpass(data):
    geojson = convert_overpass_alimentadores(data)
else:
    # Si ya fuera GeoJSON, recolor + limpieza (sin excluir rutas)
    feats=[]
    for ft in data.get("features", []):
        props = ft.get("properties", {}) or {}
        base = clean_props(props)
        color = color_for(props)
        ref_norm, _ = normalize_ref(props.get("ref"), props.get("name"))
        if ref_norm: base["ref_norm"] = ref_norm
        geom_type = ft.get("geometry",{}).get("type")
        kind = props.get("kind") or ("route" if geom_type in ("LineString","MultiLineString") else "stop" if geom_type=="Point" else "feature")
        style = {"stroke":color,"stroke-width":4,"stroke-opacity":1.0} if kind=="route" else {"marker-color":color,"marker-symbol":"bus"} if kind=="stop" else {}
        feats.append({"type":"Feature","geometry": ft.get("geometry"),"properties": {**base, **style}})
    geojson = {"type":"FeatureCollection","features":feats}

# ---------- Guardar ----------
for out in (OUT_JSON, OUT_GEO):
    out.write_text(json.dumps(geojson, ensure_ascii=False, indent=2), encoding="utf-8")
    print("✔ Guardado:", out)

routes = [f for f in geojson["features"] if f["properties"].get("kind")=="route"]
stops  = [f for f in geojson["features"] if f["properties"].get("kind")=="stop"]
refs   = sorted({(f["properties"].get("ref_norm") or f["properties"].get("ref") or "").upper() 
                 for f in routes if f["properties"].get("ref") or f["properties"].get("ref_norm")})
print("\nResumen → Rutas:", len(routes), "| Paradas:", len(stops))
print("Refs (muestra):", refs[:25], "… total:", len(refs))



=== RUTAS DETECTADAS ===
id; ref_raw; ref_norm; name; network
2071369; AS-04; AS-04; Alimentadora Sur Villa el Salvador; Metropolitano
2071369; AS-04; AS-04; Alimentadora Sur Villa el Salvador; Metropolitano
2071484; AS-02; AS-02; Alimentadora Sur Alameda Sur; Metropolitano
2071484; AS-02; AS-02; Alimentadora Sur Alameda Sur; Metropolitano
2113411; AS-07; AS-07; Alimentadora Sur América (Ida); Metropolitano
2113411; AS-07; AS-07; Alimentadora Sur América (Ida); Metropolitano
2217685; AN-01; AN-01; Alimentadora Norte Tahuantinsuyo; Metropolitano
2217685; AN-01; AN-01; Alimentadora Norte Tahuantinsuyo; Metropolitano
3658015; AN-02; AN-02; Alimentadora Norte Tungasuca; Metropolitano
3658015; AN-02; AN-02; Alimentadora Norte Tungasuca; Metropolitano
3658052; AN-03; AN-03; Alimentadora Norte Trapiche; Metropolitano
3658052; AN-03; AN-03; Alimentadora Norte Trapiche; Metropolitano
3708308; AN-10; AN-10; Alimentadora Norte Santo Domingo; Metropolitano
3708308; AN-10; AN-10; Alimentadora Nort

# Metro

## Generación json

In [3]:
# scripts/metro_build_and_clean.py
# -*- coding: utf-8 -*-
"""
Pipeline único:
1) Lee Overpass JSON desde data/raw/osm/metro.json
2) Convierte a GeoJSON (rutas + paradas), robusto a geometría embebida en miembros
3) Limpia las rutas eliminando "rectángulos" de estación y picos locales
4) Escribe data/raw/converted/metro.geojson y metro.json

Requisitos:
  pip install shapely pyproj
"""
from __future__ import annotations
import json, re
from pathlib import Path
from collections import defaultdict
from typing import List, Tuple
from math import hypot

from shapely.geometry import shape, mapping, LineString, MultiLineString
from shapely.ops import transform
from pyproj import Transformer

# ===================== Resolución de rutas =====================
def resolve_project_root() -> Path:
    cwd = Path.cwd()
    if cwd.name == "scripts" and (cwd.parent / "data").exists(): return cwd.parent
    if (cwd / "scripts").exists() and (cwd / "data").exists(): return cwd
    for p in cwd.parents:
        if (p / "data").exists(): return p
    return cwd

ROOT      = resolve_project_root()
IN_PATH   = ROOT / "data" / "raw" / "osm" / "metro.json"
OUT_DIR   = ROOT / "data" / "raw" / "converted" / "metro"
OUT_JSON  = OUT_DIR / "metro.json"
OUT_GEO   = OUT_DIR / "metro.geojson"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ===================== Conversión Overpass → GeoJSON =====================
def is_overpass(obj): return isinstance(obj, dict) and isinstance(obj.get("elements"), list)
def lonlat(n): return [n["lon"], n["lat"]]

KEEP_KEYS = {"ref","name","from","to","network","operator","route","description"}
DROP_KEYS = {"maxspeed","max_speed","source","created_by","opening_hours","phone","email",
             "website","wikidata","wikipedia","short_name","alt_name","old_name",
             "check_date","survey:date","start_date","end_date"}
DROP_PREFIX = ("addr:","contact:","gnis:","tiger:","seamark:","source:")
def clean_props(tags: dict) -> dict:
    out={}
    for k,v in (tags or {}).items():
        if k in KEEP_KEYS: out[k]=v
        elif (k in DROP_KEYS) or any(k.startswith(p) for p in DROP_PREFIX): pass
    return out

LINE_COLOR = {
    "L1": "#4AA23E",  # verde
    "L2": "#FFB81C",  # amarillo
    "L3": "#00C1D4",  # celeste
    "L4": "#D62828",  # rojo
    "L5": "#F07BB6",  # rosado
    "L6": "#8C80D8",  # lila
}
HEX_RE = re.compile(r"#?[0-9A-Fa-f]{6}$")
def color_for(tags):
    ref = (tags or {}).get("ref","").strip().upper()
    if ref in LINE_COLOR: return LINE_COLOR[ref]
    c = (tags or {}).get("colour") or (tags or {}).get("color") or ""
    if HEX_RE.fullmatch(c): return c if c.startswith("#") else f"#{c}"
    return "#888888"

def build_indexes(elements):
    nodes, ways, relations = {}, {}, []
    for el in elements:
        t = el.get("type")
        if t == "node": nodes[el["id"]] = el
        elif t == "way": ways[el["id"]] = el
        elif t == "relation": relations.append(el)
    return nodes, ways, relations

def coords_from_way(way, nodes_by_id):
    if not way: return []
    if "geometry" in way and way["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

# Lee geometría del propio miembro si no hay way global
def coords_from_member(m, ways_by_id, nodes_by_id):
    if m.get("type") != "way": return []
    ref = m.get("ref")
    # 1) Si el miembro trae geometry embebida (Overpass "_fullGeom…")
    if "geometry" in m and m["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in m["geometry"]]
    # 2) Si el ref es numérico y existe el way top-level
    if isinstance(ref, int) and ref in ways_by_id:
        return coords_from_way(ways_by_id[ref], nodes_by_id)
    # 3) Si el ref es string "_fullGeom…" e incluye id al final
    if isinstance(ref, str):
        m_id = re.search(r"(\d+)$", ref)
        if m_id:
            wid = int(m_id.group(1))
            if wid in ways_by_id:
                return coords_from_way(ways_by_id[wid], nodes_by_id)
    return []

STOP_ROLES = {"stop","platform","stop_entry_only","platform_entry_only","stop_exit_only","platform_exit_only"}

def stops_from_relation(rel, nodes_by_id, color, base_props):
    feats=[]
    for m in rel.get("members", []):
        if m.get("type") != "node":
            continue
        role = (m.get("role") or "").lower()
        if role not in STOP_ROLES:
            continue
        node = nodes_by_id.get(m.get("ref"))
        if not node:
            continue
        ntags = (node.get("tags") or {})
        stop_name = (
            ntags.get("name")
            or ntags.get("official_name")
            or ntags.get("alt_name")
            or ntags.get("short_name")
            or base_props.get("name", "")  # fallback
        )
        stop_ref = ntags.get("ref") or ntags.get("local_ref") or ntags.get("uic_ref")
        props = {
            **base_props,                 # meta de la línea
            "kind": "station",
            "name": stop_name,            # ← ahora el nombre real de la parada
            "stop_ref": stop_ref,         # código opcional de parada si existe
            "_osm_type": "node",
            "_osm_id": node["id"],
            "marker-color": color,
            "marker-symbol": "rail-metro",
        }
        feats.append({
            "type": "Feature",
            "geometry": {"type": "Point", "coordinates": [node["lon"], node["lat"]]},
            "properties": props
        })
    return feats


def route_feats_from_relation(rel, ways_by_id, nodes_by_id):
    tags  = rel.get("tags", {}) or {}
    if tags.get("route") not in ("subway","light_rail"): return []
    base  = clean_props(tags)
    color = color_for(tags)
    base.update({"_osm_type":"relation","_osm_id": rel["id"]})

    forward, backward, both = [], [], []
    for m in rel.get("members", []):
        if m.get("type") != "way": continue
        line = coords_from_member(m, ways_by_id, nodes_by_id)
        if not line: continue
        role = (m.get("role") or "").lower()
        if role == "forward": forward.append(line)
        elif role == "backward": backward.append(line)
        else: both.append(line)

    feats=[]
    def add(lines, direction=None, dashed=False):
        if not lines: return
        props = {"kind":"route","stroke":color,"stroke-width":5,"stroke-opacity":1.0}
        if direction: props["direction"]=direction
        if dashed: props["stroke-dasharray"]="4,2"
        feats.append({"type":"Feature","geometry":{"type":"MultiLineString","coordinates":lines},"properties":{**base, **props}})

    if forward or backward:
        add(forward + both, direction="ida")
        add(backward + both, direction="vuelta", dashed=True)
    else:
        add(both)  # una sola

    feats += stops_from_relation(rel, nodes_by_id, color, base)
    return feats

def convert_overpass_metro(data):
    nodes, ways, relations = build_indexes(data.get("elements", []))
    groups = defaultdict(list)
    for r in relations:
        t = r.get("tags",{}) or {}
        if t.get("route") in ("subway","light_rail"):
            groups[(t.get("network"), t.get("ref","?").strip().upper())].append(r)

    features=[]
    for (_net, _ref), rels in groups.items():
        if len(rels)==2:
            f1 = route_feats_from_relation(rels[0], ways, nodes); features += f1
            f2 = route_feats_from_relation(rels[1], ways, nodes)
            for ft in f2:
                if ft["properties"].get("kind")=="route" and "direction" not in ft["properties"]:
                    ft["properties"]["direction"]="vuelta"; ft["properties"]["stroke-dasharray"]="4,2"
            features += f2
        else:
            for r in rels: features += route_feats_from_relation(r, ways, nodes)
    return {"type":"FeatureCollection","features":features}

# ===================== Limpieza geométrica (metros) =====================
# Detecta "rectángulos de estación"
CLOSED_CHORD_TOL_M = 30.0      # distancia inicio-fin < 30 m -> casi cerrado
BBOX_DIAG_TOL_M    = 200.0     # diagonal de la caja < 200 m -> pequeño
MAX_LOOP_POINTS    = 24        # # vértices por tramo pequeño
# Limpieza de ruiditos
SPIKE_TOL_M        = 10.0      # quitar picos locales
DEDUP_TOL_M        = 0.20      # colapsar puntos pegados
# Proyección métrica (Lima)
TARGET_EPSG        = "EPSG:32718"

def _build_transformers():
    fwd = Transformer.from_crs("EPSG:4326", TARGET_EPSG, always_xy=True).transform
    inv = Transformer.from_crs(TARGET_EPSG, "EPSG:4326", always_xy=True).transform
    return fwd, inv

def _euclid(p, q) -> float:
    dx = p[0] - q[0]; dy = p[1] - q[1]
    return (dx*dx + dy*dy) ** 0.5

def dedup_coords(coords: List[Tuple[float, float]], tol: float) -> List[Tuple[float, float]]:
    if not coords: return coords
    out = [coords[0]]
    for c in coords[1:]:
        if _euclid(out[-1], c) >= tol:
            out.append(c)
    if len(out) == 1 and len(coords) > 1:
        out.append(coords[-1])
    return out

def despike_coords(coords: List[Tuple[float, float]], spike_tol: float, dedup_tol: float) -> List[Tuple[float, float]]:
    if len(coords) < 3: return coords
    changed = True; cur = coords[:]
    while changed:
        changed = False
        cur = dedup_coords(cur, dedup_tol)
        if len(cur) < 3: break
        keep = [cur[0]]; i = 1
        while i < len(cur) - 1:
            prev, mid, nxt = cur[i-1], cur[i], cur[i+1]
            if _euclid(prev, nxt) < spike_tol:
                changed = True; i += 1
            else:
                keep.append(mid); i += 1
        keep.append(cur[-1]); cur = keep
    return cur

def is_station_rectangle(ls_m: LineString) -> bool:
    """Tramo pequeño, casi cerrado y con caja compacta (rectangulito alrededor de la estación)."""
    if ls_m.is_empty or len(ls_m.coords) < 4:
        return False
    start = ls_m.coords[0]; end = ls_m.coords[-1]
    if _euclid(start, end) > CLOSED_CHORD_TOL_M:
        return False
    if len(ls_m.coords) > MAX_LOOP_POINTS:
        return False
    minx, miny, maxx, maxy = ls_m.bounds
    diag = hypot(maxx - minx, maxy - miny)
    return diag <= BBOX_DIAG_TOL_M

def clean_linestring(ls_ll: LineString, fwd, inv) -> LineString | None:
    ls_m = transform(fwd, ls_ll)
    coords_m = despike_coords(list(ls_m.coords), SPIKE_TOL_M, DEDUP_TOL_M)
    if len(coords_m) < 2:
        return None
    ls_m2 = LineString(coords_m)
    ls_ll_out = transform(inv, ls_m2)
    return ls_ll_out if ls_ll_out.is_valid and not ls_ll_out.is_empty and len(ls_ll_out.coords) >= 2 else None

def clean_multilinestring(mls_ll: MultiLineString, fwd, inv) -> Tuple[MultiLineString | LineString | None, int]:
    kept_parts = []
    removed_loops = 0
    for part in mls_ll.geoms:
        part_m = transform(fwd, part)
        if is_station_rectangle(part_m):
            removed_loops += 1
            continue
        cleaned = clean_linestring(part, fwd, inv)
        if isinstance(cleaned, LineString) and len(cleaned.coords) >= 2:
            kept_parts.append(cleaned)
    if not kept_parts:
        return None, removed_loops
    if len(kept_parts) == 1:
        return kept_parts[0], removed_loops
    return MultiLineString([list(ls.coords) for ls in kept_parts]), removed_loops

def clean_feature_geometry(geom: dict, fwd, inv) -> Tuple[dict | None, int]:
    g = shape(geom)
    if g.geom_type == "LineString":
        out = clean_linestring(g, fwd, inv)
        return (mapping(out), 0) if out is not None else (None, 0)
    elif g.geom_type == "MultiLineString":
        out, removed = clean_multilinestring(g, fwd, inv)
        return (mapping(out), removed) if out is not None else (None, removed)
    else:
        return (geom, 0)  # points/otros: no tocar

# ===================== Ejecutar pipeline =====================
def main():
    data = json.loads(IN_PATH.read_text(encoding="utf-8"))
    if not is_overpass(data):
        raise ValueError("metro.json no es un Overpass JSON válido (falta 'elements').")

    # 1) Convertir Overpass → GeoJSON
    fc = convert_overpass_metro(data)

    # 2) Limpiar rutas (dejar estaciones tal cual)
    fwd, inv = _build_transformers()
    cleaned_features = []
    removed_small_rects = 0

    for feat in fc.get("features", []):
        geom = feat.get("geometry")
        props = feat.get("properties", {}) or {}

        if geom and geom.get("type") in {"LineString", "MultiLineString"} and props.get("kind") == "route":
            new_geom, removed = clean_feature_geometry(geom, fwd, inv)
            removed_small_rects += removed
            if new_geom is None:  # si todo el tramo se eliminó, se descarta
                continue
            new_feat = {"type": "Feature", "geometry": new_geom, "properties": props}
        else:
            # estaciones u otros: copiar directo
            new_feat = feat

        cleaned_features.append(new_feat)

    fc_clean = {"type": "FeatureCollection", "features": cleaned_features}

    # 3) Guardar
    OUT_JSON.write_text(json.dumps(fc_clean, ensure_ascii=False, indent=2), encoding="utf-8")
    OUT_GEO.write_text(json.dumps(fc_clean, ensure_ascii=False, indent=2), encoding="utf-8")

    # 4) Resumen
    n_routes = sum(1 for f in fc_clean["features"] if f["properties"].get("kind")=="route")
    n_stops  = sum(1 for f in fc_clean["features"] if f["properties"].get("kind")=="station")
    print("✔ Guardado:", OUT_JSON)
    print("✔ Guardado:", OUT_GEO)
    print(f"Rutas: {n_routes} | Estaciones: {n_stops}")
    print(f"Rectángulos de estación eliminados: {removed_small_rects}")

if __name__ == "__main__":
    main()


✔ Guardado: d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\metro\metro.json
✔ Guardado: d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\metro\metro.geojson
Rutas: 4 | Estaciones: 62
Rectángulos de estación eliminados: 50


## Punto medio

In [4]:
# scripts/metro.py
# -*- coding: utf-8 -*-
from __future__ import annotations
import json, re
from pathlib import Path
from collections import defaultdict
from typing import Dict, List, Tuple, Iterable

from shapely.geometry import shape, mapping, LineString, MultiLineString, Point
from shapely.ops import linemerge, unary_union, nearest_points
from shapely.ops import transform as shp_transform
from pyproj import Transformer

# ---------------- rutas de proyecto ----------------
def resolve_project_root() -> Path:
    cwd = Path.cwd()
    if cwd.name == "scripts" and (cwd.parent / "data").exists(): return cwd.parent
    if (cwd / "scripts").exists() and (cwd / "data").exists(): return cwd
    for p in cwd.parents:
        if (p / "data").exists(): return p
    return cwd

ROOT = resolve_project_root()
IN_PATH  = ROOT / "data" / "raw" / "osm" / "metro.json"
OUT_DIR  = ROOT / "data" / "raw" / "converted" / "metro"
OUT_GEO  = OUT_DIR / "metro.geojson"
OUT_JSON = OUT_DIR / "metro.json"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ---------------- utilidades básicas ----------------
def is_overpass(obj): return isinstance(obj, dict) and isinstance(obj.get("elements"), list)
def lonlat(n): return [n["lon"], n["lat"]]

KEEP_KEYS = {"ref","name","from","to","network","operator","route","description"}
DROP_KEYS = {"maxspeed","max_speed","source","created_by","opening_hours","phone","email",
             "website","wikidata","wikipedia","short_name","alt_name","old_name",
             "check_date","survey:date","start_date","end_date"}
DROP_PREFIX = ("addr:","contact:","gnis:","tiger:","seamark:","source:")
def clean_props(tags: dict) -> dict:
    out={}
    for k,v in (tags or {}).items():
        if k in KEEP_KEYS: out[k]=v
        elif (k in DROP_KEYS) or any(k.startswith(p) for p in DROP_PREFIX): pass
    return out

LINE_COLOR = {
    "L1": "#4AA23E", "L2": "#FFB81C", "L3": "#00C1D4",
    "L4": "#D62828", "L5": "#F07BB6", "L6": "#8C80D8",
}
HEX_RE = re.compile(r"#?[0-9A-Fa-f]{6}$")
def color_for(tags):
    ref = (tags or {}).get("ref","").strip().upper()
    if ref in LINE_COLOR: return LINE_COLOR[ref]
    c = (tags or {}).get("colour") or (tags or {}).get("color") or ""
    if HEX_RE.fullmatch(c): return c if c.startswith("#") else f"#{c}"
    return "#888888"

def build_indexes(elements):
    nodes, ways, relations = {}, {}, []
    for el in elements:
        t = el.get("type")
        if t == "node": nodes[el["id"]] = el
        elif t == "way": ways[el["id"]] = el
        elif t == "relation": relations.append(el)
    return nodes, ways, relations

def coords_from_way(way, nodes_by_id):
    if not way: return []
    if "geometry" in way and way["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

# lee geometría del propio miembro si la trae
def coords_from_member(m, ways_by_id, nodes_by_id):
    if m.get("type") != "way": return []
    ref = m.get("ref")
    if "geometry" in m and m["geometry"]:
        return [[pt["lon"], pt["lat"]] for pt in m["geometry"]]
    if isinstance(ref, int) and ref in ways_by_id:
        return coords_from_way(ways_by_id[ref], nodes_by_id)
    if isinstance(ref, str):
        m_id = re.search(r"(\d+)$", ref)
        if m_id:
            wid = int(m_id.group(1))
            if wid in ways_by_id:
                return coords_from_way(ways_by_id[wid], nodes_by_id)
    return []

STOP_ROLES = {"stop","platform","stop_entry_only","platform_entry_only","stop_exit_only","platform_exit_only"}

# normalización y elección de nombre de estación
EST_RE = re.compile(r'^(estaci[oó]n)\s+', re.I)
NAME_DIR_RE = re.compile(r'\b(norte|sur|este|oeste|ida|vuelta)\b', re.I)
AB_RE = re.compile(r'\b([ab])\b', re.I)

def normalize_stop_name(s: str) -> str:
    s = (s or "").strip()
    s = EST_RE.sub("", s)
    s = NAME_DIR_RE.sub("", s)
    s = AB_RE.sub("", s)
    return re.sub(r'\s+', ' ', s).strip(' -–—').strip()

def best_node_name(ntags: dict, fallback: str) -> str:
    cand = (
        ntags.get("name")
        or ntags.get("official_name")
        or ntags.get("alt_name")
        or ntags.get("short_name")
        or fallback
        or ""
    )
    return cand

def merge_stop_names(n1: str, n2: str) -> str:
    if not n1: return n2
    if not n2: return n1
    n1n, n2n = normalize_stop_name(n1), normalize_stop_name(n2)
    if n1n.lower() == n2n.lower(): return n1n
    # si uno contiene al otro, usa el más largo
    if n1n.lower() in n2n.lower(): return n2n
    if n2n.lower() in n1n.lower(): return n1n
    # prefijo común por palabras
    w1, w2 = n1n.split(), n2n.split()
    common=[]
    for a,b in zip(w1,w2):
        if a.lower()==b.lower(): common.append(a)
        else: break
    cand=' '.join(common).strip()
    return cand if len(cand)>=4 else (n1 if len(n1)>=len(n2) else n2)

def stops_from_relation(rel, nodes_by_id, color, base_props):
    feats=[]
    for m in rel.get("members", []):
        if m.get("type") != "node": 
            continue
        role = (m.get("role") or "").lower()
        if role not in STOP_ROLES:
            continue
        node = nodes_by_id.get(m.get("ref"))
        if not node:
            continue
        ntags = node.get("tags") or {}
        stop_name = best_node_name(ntags, base_props.get("name",""))
        props = {
            **base_props,
            "kind":"station",
            "name": stop_name,                   # ← nombre real
            "stop_ref": ntags.get("ref") or ntags.get("local_ref") or ntags.get("uic_ref"),
            "_osm_type":"node","_osm_id": node["id"],
            "marker-color": color, "marker-symbol":"rail-metro",
        }
        feats.append({
            "type":"Feature",
            "geometry":{"type":"Point","coordinates": lonlat(node)},
            "properties": props
        })
    return feats

def route_feats_from_relation(rel, ways_by_id, nodes_by_id):
    tags  = rel.get("tags", {}) or {}
    if tags.get("route") not in ("subway","light_rail"): return []
    base  = clean_props(tags)
    color = color_for(tags)
    base.update({"_osm_type":"relation","_osm_id": rel["id"]})

    forward, backward, both = [], [], []
    for m in rel.get("members", []):
        if m.get("type") != "way": continue
        line = coords_from_member(m, ways_by_id, nodes_by_id)
        if not line: continue
        role = (m.get("role") or "").lower()
        if role == "forward": forward.append(line)
        elif role == "backward": backward.append(line)
        else: both.append(line)

    feats=[]
    def add(lines, direction=None, dashed=False):
        if not lines: return
        props = {"kind":"route","stroke":color,"stroke-width":5,"stroke-opacity":1.0}
        if direction: props["direction"]=direction
        if dashed: props["stroke-dasharray"]="4,2"
        feats.append({"type":"Feature","geometry":{"type":"MultiLineString","coordinates":lines},"properties":{**base, **props}})

    if forward or backward:
        add(forward + both, direction="ida")
        add(backward + both, direction="vuelta", dashed=True)
    else:
        add(both)

    feats += stops_from_relation(rel, nodes_by_id, color, base)
    return feats

def convert_overpass_metro(data):
    nodes, ways, relations = build_indexes(data.get("elements", []))
    groups = defaultdict(list)
    for r in relations:
        t = r.get("tags",{}) or {}
        if t.get("route") in ("subway","light_rail"):
            groups[(t.get("network"), t.get("ref","?").strip().upper())].append(r)

    features=[]
    for (_net, ref), rels in groups.items():
        if len(rels)==2:
            f1 = route_feats_from_relation(rels[0], ways, nodes); features += f1
            f2 = route_feats_from_relation(rels[1], ways, nodes)
            for ft in f2:
                if ft["properties"].get("kind")=="route" and "direction" not in ft["properties"]:
                    ft["properties"]["direction"]="vuelta"; ft["properties"]["stroke-dasharray"]="4,2"
            features += f2
        else:
            for r in rels: features += route_feats_from_relation(r, ways, nodes)
    return {"type":"FeatureCollection","features":features}

# ---------------- centerline de vías y promedio de paradas ----------------
TARGET_EPSG = "EPSG:32718"  # UTM 18S (Lima)
def tfms():
    fwd = Transformer.from_crs("EPSG:4326", TARGET_EPSG, always_xy=True).transform
    inv = Transformer.from_crs(TARGET_EPSG, "EPSG:4326", always_xy=True).transform
    return fwd, inv

def to_linestring_one(g) -> LineString:
    """
    Devuelve una única LineString a partir de LineString/MultiLineString.
    - Si es LineString: la devuelve tal cual (no linemerge).
    - Si es MultiLineString: intenta linemerge; si no queda una sola,
      elige la LineString más larga.
    """
    if isinstance(g, LineString):
        return g

    if isinstance(g, MultiLineString):
        # Intentar fusionar sin unary_union (Shapely 2 no acepta LINESTRING en linemerge)
        merged = linemerge(g)
        if isinstance(merged, LineString):
            return merged

        # Si sigue siendo MultiLineString/GeometryCollection, escoger la más larga
        parts = [ls for ls in getattr(merged, "geoms", []) if isinstance(ls, LineString)]
        if not parts:
            parts = list(g.geoms)
        parts.sort(key=lambda ls: ls.length, reverse=True)
        return parts[0]

    # Fallback para colecciones raras: tomar la LS más larga que exista
    if hasattr(g, "geoms"):
        parts = [ls for ls in g.geoms if isinstance(ls, LineString)]
        if parts:
            parts.sort(key=lambda ls: ls.length, reverse=True)
            return parts[0]

    raise ValueError("Geometría de ruta no soportada: " + getattr(g, "geom_type", str(type(g))))


def densify_along(ls_m: LineString, step_m: float = 15.0) -> List[Tuple[float, float]]:
    n_steps = max(2, int(ls_m.length // step_m) + 1)
    return [ls_m.interpolate(i * ls_m.length / (n_steps - 1)).coords[0] for i in range(n_steps)]

def midpoint(p,q): return ((p[0]+q[0])*0.5, (p[1]+q[1])*0.5)

def dedup(coords: Iterable[Tuple[float, float]], tol: float = 0.2) -> List[Tuple[float, float]]:
    coords = list(coords)
    if not coords: return coords
    out=[coords[0]]
    for c in coords[1:]:
        dx=c[0]-out[-1][0]; dy=c[1]-out[-1][1]
        if (dx*dx+dy*dy)**0.5 >= tol: out.append(c)
    if len(out)==1 and len(coords)>1: out.append(coords[-1])
    return out

def centerline_between(ls1_ll: LineString, ls2_ll: LineString, step_m: float = 15.0) -> LineString:
    fwd, inv = tfms()
    ls1_m = shp_transform(fwd, ls1_ll)
    ls2_m = shp_transform(fwd, ls2_ll)
    samples = densify_along(ls1_m, step_m)
    mids=[]
    for s in samples:
        p1 = Point(s)
        _, p2 = nearest_points(p1, ls2_m)
        mids.append(midpoint(p1.coords[0], p2.coords[0]))
    mids = dedup(mids, tol=0.10)
    return shp_transform(inv, LineString(mids))

def pair_station_features(stations: List[dict], max_pair_m: float = 120.0) -> List[dict]:
    if not stations: return []
    fwd, inv = tfms()
    pts = [(shp_transform(fwd, shape(s["geometry"])), s["properties"]) for s in stations]
    used=[False]*len(pts)
    out=[]
    for i,(pi,pi_props) in enumerate(pts):
        if used[i]: continue
        best_j, best_d2 = -1, float("inf")
        for j,(pj,pj_props) in enumerate(pts):
            if i==j or used[j]: continue
            d2 = (pi.x-pj.x)**2 + (pi.y-pj.y)**2
            if d2 < best_d2: best_d2, best_j = d2, j
        if best_j>=0 and (best_d2**0.5) <= max_pair_m:
            used[i]=used[best_j]=True
            pj, pj_props = pts[best_j]
            mid = Point((pi.x+pj.x)*0.5, (pi.y+pj.y)*0.5)
            name = merge_stop_names(pi_props.get("name",""), pj_props.get("name",""))
            base = {**pi_props, **pj_props}
            base.pop("direction", None); base.pop("stroke-dasharray", None)
            base["kind"]="station"; base["name"]=name
            out.append({"type":"Feature","geometry":mapping(shp_transform(inv, mid)),"properties":base})
        else:
            used[i]=True
            base = dict(pi_props)
            base.pop("direction", None); base.pop("stroke-dasharray", None)
            base["kind"]="station"; base["name"]=normalize_stop_name(base.get("name",""))
            out.append({"type":"Feature","geometry":mapping(shp_transform(inv, pi)),"properties":base})
    return out

def group_features_by_ref(features: List[dict]) -> Dict[str, Dict[str, List[dict]]]:
    groups: Dict[str, Dict[str, List[dict]]] = {}
    for f in features:
        props = f.get("properties", {}) or {}
        ref = (props.get("ref") or "?").strip().upper()
        groups.setdefault(ref, {"routes": [], "stations": []})
        if props.get("kind") == "route":
            groups[ref]["routes"].append(f)
        elif props.get("kind") == "station":
            groups[ref]["stations"].append(f)
    return groups

def build_center_for_group(ref: str, routes: List[dict], stations: List[dict]) -> List[dict]:
    out=[]
    if routes:
        lines = []
        for r in routes:
            try:
                lines.append(to_linestring_one(shape(r["geometry"])))
            except Exception:
                # ignora geometrías no-lineales o inválidas
                pass

        if not lines:
            # no hay líneas válidas para esta ref
            return out

        lines.sort(key=lambda ls: ls.length, reverse=True)
        if len(lines) >= 2:
            center_ls = centerline_between(lines[0], lines[1], step_m=15.0)
            base_props = routes[0]["properties"].copy()
        else:
            center_ls = lines[0]
            base_props = routes[0]["properties"].copy()

        for k in ("direction","stroke-dasharray"): base_props.pop(k, None)
        out.append({
            "type":"Feature",
            "geometry": mapping(center_ls),
            "properties": {**base_props, "kind":"route"}
        })

    if stations:
        out.extend(pair_station_features(stations, max_pair_m=120.0))

    return out


# ---------------- ejecutar todo (convertir + centerline) ----------------
def main():
    data = json.loads(IN_PATH.read_text(encoding="utf-8"))
    if not is_overpass(data):
        raise ValueError("metro.json no es un Overpass JSON válido.")
    # 1) convertir
    raw_fc = convert_overpass_metro(data)
    # 2) centerline/fusión
    groups = group_features_by_ref(raw_fc.get("features", []))
    out_features=[]
    for ref, g in groups.items():
        out_features += build_center_for_group(ref, g["routes"], g["stations"])
    out_fc = {"type":"FeatureCollection","features":out_features}
    # 3) guardar con nombre "metro"
    OUT_JSON.write_text(json.dumps(out_fc, ensure_ascii=False, indent=2), encoding="utf-8")
    OUT_GEO.write_text(json.dumps(out_fc, ensure_ascii=False, indent=2), encoding="utf-8")
    # resumen
    n_routes = sum(1 for f in out_features if f["properties"].get("kind")=="route")
    n_stops  = sum(1 for f in out_features if f["properties"].get("kind")=="station")
    print("Entrada:", IN_PATH)
    print("Salida (GeoJSON):", OUT_GEO)
    print("Salida (JSON):   ", OUT_JSON)
    print(f"Rutas (centerline): {n_routes}")
    print(f"Estaciones (promedio): {n_stops}")

if __name__ == "__main__":
    main()


Entrada: d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\osm\metro.json
Salida (GeoJSON): d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\metro\metro.geojson
Salida (JSON):    d:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\converted\metro\metro.json
Rutas (centerline): 2
Estaciones (promedio): 31


Collecting pyproj
  Downloading pyproj-3.7.2-cp313-cp313-win_amd64.whl.metadata (31 kB)
Downloading pyproj-3.7.2-cp313-cp313-win_amd64.whl (6.3 MB)
   ---------------------------------------- 0.0/6.3 MB ? eta -:--:--
   ---------------------------------------- 6.3/6.3 MB 90.0 MB/s eta 0:00:00
Installing collected packages: pyproj
Successfully installed pyproj-3.7.2


# Transporte Público

## Parsear HTML

In [20]:
# -*- coding: utf-8 -*-
"""
Lee config/wikirutas.html y genera dos CSV en el MISMO directorio:
- rutas_actuales.csv
- rutas_anteriores.csv

Mejoras:
- Limpieza ULTRA: normaliza espacios Unicode, elimina invisibles (ZWSP, NBSP, BOM, LRM/RLM, etc.),
  corrige secuencias molestas como ' ␠+ZWSP' (el "caracter raro" que mencionaste),
  colapsa espacios y recorta extremos.
- Captura y normalización de colores (#RGB -> #RRGGBB en MAYÚSCULAS).
- Columnas *_color con el hex detectado en cada celda relevante.
- Si alias es "Ninguno" o "¿?" (o "?"), lo deja como null (NaN en CSV).
- Nueva columna 'abreviacion' que extrae el texto entre paréntesis al final de 'empresa' (si existe).
"""

import re
import sys
import argparse
from pathlib import Path
from typing import List, Dict, Tuple, Optional

import pandas as pd
from bs4 import BeautifulSoup

# ------------------ Utilidades ------------------

# 0) Secuencias problemáticas comunes (incluye el "espacio + ZWSP" que se te cuela)
_SEQ_FIXES = (
    (" \u200b", " "),   # espacio normal + ZWSP  -> espacio
    ("\u00a0\u200b", " "),  # NBSP + ZWSP -> espacio
    ("\u200b", ""),     # ZWSP suelto
)

# 1) Mapea "espacios" Unicode visibles a espacio normal (U+0020)
_SPACE_TRANS = {
    ord('\u00A0'): ' ',  # NO-BREAK SPACE
    ord('\u1680'): ' ',  # OGHAM SPACE MARK
    ord('\u2000'): ' ',  # EN QUAD
    ord('\u2001'): ' ',  # EM QUAD
    ord('\u2002'): ' ',  # EN SPACE
    ord('\u2003'): ' ',  # EM SPACE
    ord('\u2004'): ' ',  # THREE-PER-EM SPACE
    ord('\u2005'): ' ',  # FOUR-PER-EM SPACE
    ord('\u2006'): ' ',  # SIX-PER-EM SPACE
    ord('\u2007'): ' ',  # FIGURE SPACE
    ord('\u2008'): ' ',  # PUNCTUATION SPACE
    ord('\u2009'): ' ',  # THIN SPACE
    ord('\u200A'): ' ',  # HAIR SPACE
    ord('\u202F'): ' ',  # NARROW NO-BREAK SPACE
    ord('\u205F'): ' ',  # MEDIUM MATHEMATICAL SPACE
    ord('\u3000'): ' ',  # IDEOGRAPHIC SPACE
}

# 2) Elimina caracteres de formato/invisibles y de dirección de texto
_INVIS_REMOVE = {
    # Cero ancho y similares
    ord('\u200B'): None,  # ZERO WIDTH SPACE
    ord('\u200C'): None,  # ZERO WIDTH NON-JOINER
    ord('\u200D'): None,  # ZERO WIDTH JOINER
    ord('\u2060'): None,  # WORD JOINER
    ord('\uFEFF'): None,  # ZERO WIDTH NO-BREAK SPACE / BOM
    ord('\u180E'): None,  # MONGOLIAN VOWEL SEPARATOR (deprec.)
    # Marcas de dirección / embedding (a veces se cuelan al copiar de la web)
    ord('\u200E'): None,  # LEFT-TO-RIGHT MARK
    ord('\u200F'): None,  # RIGHT-TO-LEFT MARK
    ord('\u202A'): None,  # LRE
    ord('\u202B'): None,  # RLE
    ord('\u202C'): None,  # PDF
    ord('\u202D'): None,  # LRO
    ord('\u202E'): None,  # RLO
    ord('\u2066'): None,  # LRI
    ord('\u2067'): None,  # RLI
    ord('\u2068'): None,  # FSI
    ord('\u2069'): None,  # PDI
    # Separador invisible y joiner de grafemas (por si acaso)
    ord('\u2063'): None,  # INVISIBLE SEPARATOR
    ord('\u034F'): None,  # COMBINING GRAPHEME JOINER
}

def _expand_hex3(h: str) -> str:
    """'ABC' -> 'AABBCC'; ya viene sin # y en [0-9A-F]{3,6}."""
    h = h.upper()
    if len(h) == 3:
        return "".join(ch * 2 for ch in h)
    return h

def clean_text(s: Optional[str]) -> str:
    """Trim ULTRA: corrige secuencias, normaliza espacios, borra invisibles, colapsa y recorta."""
    if s is None:
        return ""
    # Fix de secuencias molestas antes de traducir
    for a, b in _SEQ_FIXES:
        s = s.replace(a, b)
    # Normaliza/limpia Unicode
    s = s.translate(_SPACE_TRANS).translate(_INVIS_REMOVE)
    # Quita citas [1], [23], etc.
    s = re.sub(r"\[\d+\]", "", s)
    # Colapsa whitespace y recorta
    s = re.sub(r"\s+", " ", s).strip()
    return s

def clean_text_keep_none(x):
    """Como clean_text, pero preservando None/NaN."""
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return None
    return clean_text(str(x))

def cell_text(tag) -> str:
    if tag is None:
        return ""
    # Remueve superíndices (citas)
    for sup in tag.find_all("sup"):
        sup.decompose()
    # Extrae texto y lo limpia
    return clean_text(tag.get_text(" ", strip=True))

def _normalize_hex(match_hex: str) -> str:
    """match_hex: 'ABC' o 'AABBCC' (sin #). Devuelve '#AABBCC'."""
    return f"#{_expand_hex3(match_hex)}"

def first_color_hex(tag) -> Optional[str]:
    """
    Intenta leer un color de fondo en forma HEX y lo normaliza a #RRGGBB (MAYÚSCULA).
    Busca en style/background, atributo bgcolor y spans internos. Acepta #RGB.
    """
    if tag is None:
        return None

    # 1) style en la celda
    style = tag.get("style") or ""
    m = re.search(r"background(?:-color)?\s*:\s*#([0-9A-Fa-f]{3,6})", style)
    if m:
        return _normalize_hex(m.group(1))

    # 2) atributo bgcolor legacy
    if tag.has_attr("bgcolor"):
        m = re.search(r"#?([0-9A-Fa-f]{3,6})", tag["bgcolor"])
        if m:
            return _normalize_hex(m.group(1))

    # 3) spans o elementos internos con background
    inner = tag.find(attrs={"style": re.compile(r"background", re.I)})
    if inner and inner.has_attr("style"):
        m = re.search(r"background(?:-color)?\s*:\s*#([0-9A-Fa-f]{3,6})", inner["style"])
        if m:
            return _normalize_hex(m.group(1))

    return None

def normalize_header(h: str) -> str:
    h = clean_text(h).lower()
    mapping = {
        "ruta": "ruta",
        "código de ruta": "codigo_ruta",
        "codigo de ruta": "codigo_ruta",
        "código": "codigo_ruta",
        "codigo": "codigo_ruta",
        "nuevo código": "codigo_ruta",
        "nuevo codigo": "codigo_ruta",
        "código anterior": "codigo_anterior",
        "codigo anterior": "codigo_anterior",
        "seudónimo o alias": "alias",
        "seudónimo / alias": "alias",
        "seudónimo": "alias",
        "alias": "alias",
        "distrito inicial o de origen": "origen",
        "distrito inicial": "origen",
        "distrito de origen": "origen",
        "distrito final o terminal": "destino",
        "distrito final": "destino",
        "distrito de destino": "destino",
        "empresa operadora": "empresa",
        "en reemplazo por": "reemplazo_por",
        "observaciones": "observaciones",
        "notas": "notas",
    }
    if h in mapping:
        return mapping[h]
    return re.sub(r"[^a-z0-9_]+", "_", h)

def nearest_group_label(table_tag) -> str:
    """Suele ser un H3 tipo 'Rutas 1000' encima de la tabla."""
    h3 = table_tag.find_previous("h3")
    if h3:
        return clean_text(h3.get_text(" ", strip=True))
    return ""

def section_of_table(table_tag) -> Tuple[Optional[str], Optional[str]]:
    """Devuelve (id, texto) del H2 (sección) más cercano hacia atrás."""
    for prev in table_tag.find_all_previous():
        if prev.name == "h2":
            span = prev.find("span", class_="mw-headline")
            if span and span.has_attr("id"):
                return span["id"], clean_text(span.get_text(" ", strip=True))
            if prev.has_attr("id"):
                return prev["id"], clean_text(prev.get_text(" ", strip=True))
            return None, clean_text(prev.get_text(" ", strip=True))
    return None, None

def _alias_to_null(txt: str) -> Optional[str]:
    """Convierte 'Ninguno' o '¿?' (o '?') en None."""
    t = clean_text(txt).strip().lower()
    if t in ("ninguno", "¿?", "?"):
        return None
    return clean_text(txt)

def _extract_abreviacion(empresa: Optional[str]) -> str:
    """
    Extrae la abreviación entre paréntesis al FINAL de la cadena, p. ej.:
    'Empresa X S.A.C. (ETROASAC)' -> 'ETROASAC'
    Si no hay paréntesis al final, devuelve ''.
    """
    if not empresa:
        return ""
    m = re.search(r"\(([^()]+)\)\s*$", empresa)
    return clean_text(m.group(1)).upper() if m else ""

def parse_wikitable(table_tag, group_hint: str, section: str) -> List[Dict[str, str]]:
    rows: List[Dict[str, str]] = []

    # Encabezados
    thead = table_tag.find("thead")
    if thead:
        headers = [normalize_header(cell_text(th)) for th in thead.find_all("th")]
    else:
        first_tr = table_tag.find("tr")
        headers = [normalize_header(cell_text(th)) for th in first_tr.find_all(["th", "td"])] if first_tr else []

    # Filas
    for tr in table_tag.find_all("tr"):
        tds = tr.find_all("td")
        if not tds:
            continue
        rec: Dict[str, Optional[str]] = {}
        for i, td in enumerate(tds):
            key = headers[i] if i < len(headers) else f"col_{i+1}"
            txt = cell_text(td)

            # Normalización especial de 'alias' -> None si es "Ninguno" o "¿?"
            if key == "alias":
                rec[key] = _alias_to_null(txt)
            else:
                rec[key] = txt

            # color si aplica y lo ponemos en columna *_color
            if key in ("ruta", "codigo_ruta", "codigo_anterior"):
                hexcol = first_color_hex(td)
                if hexcol:
                    rec[f"{key}_color"] = hexcol

        # 'abreviacion' desde 'empresa'
        empresa_val = rec.get("empresa", "")
        rec["abreviacion"] = _extract_abreviacion(empresa_val)

        rec["grupo"] = clean_text(group_hint)
        rec["seccion"] = clean_text(section)
        rows.append(rec)  # type: ignore[arg-type]

    return rows

def to_dataframe(rows: List[Dict[str, Optional[str]]]) -> pd.DataFrame:
    if not rows:
        return pd.DataFrame()
    cols = set()
    for r in rows:
        cols.update(r.keys())
    preferred = [
        "seccion", "grupo",
        "ruta", "ruta_color",
        "codigo_ruta", "codigo_ruta_color",
        "codigo_anterior", "codigo_anterior_color",
        "alias", "origen", "destino", "empresa", "abreviacion",
        "reemplazo_por", "observaciones", "notas",
    ]
    ordered = [c for c in preferred if c in cols] + [c for c in sorted(cols) if c not in preferred]
    df = pd.DataFrame(rows, columns=ordered)

    # Limpieza final columna por columna (preserva None/NaN)
    for c in df.columns:
        if pd.api.types.is_object_dtype(df[c]):
            df[c] = df[c].map(clean_text_keep_none)

    # Asegura que 'abreviacion' exista y sea string
    if "abreviacion" in df.columns:
        df["abreviacion"] = df["abreviacion"].fillna("").astype(str)

    return df  # No fillna() para preservar nulls reales (NaN) en 'alias' y otros

# ------------------ Núcleo ------------------

def extract_to_csv_same_dir(html_path: Path) -> Tuple[Path, Path]:
    """Extrae tablas del HTML y guarda los CSV en el MISMO directorio del HTML."""
    if not html_path.exists():
        raise FileNotFoundError(f"No se encontró el HTML: {html_path}")

    html = html_path.read_text(encoding="utf-8", errors="ignore")
    soup = BeautifulSoup(html, "html.parser")

    rows_actuales: List[Dict[str, Optional[str]]] = []
    rows_anteriores: List[Dict[str, Optional[str]]] = []

    for table in soup.find_all("table", class_="wikitable"):
        sec_id, sec_text = section_of_table(table)
        grupo = nearest_group_label(table)

        if sec_id == "Código_de_Rutas_Actuales":
            rows_actuales.extend(parse_wikitable(table, grupo or "Rutas actuales", "actual"))
        elif sec_id == "Código_de_Rutas_Anteriores":
            rows_anteriores.extend(parse_wikitable(table, grupo or "Rutas anteriores", "anterior"))
        else:
            sec_t = (sec_text or "").lower()
            if "rutas actuales" in sec_t:
                rows_actuales.extend(parse_wikitable(table, grupo or "Rutas actuales", "actual"))
            elif "rutas anteriores" in sec_t:
                rows_anteriores.extend(parse_wikitable(table, grupo or "Rutas anteriores", "anterior"))

    df_act = to_dataframe(rows_actuales)
    df_ant = to_dataframe(rows_anteriores)

    outdir = html_path.parent
    act_csv = outdir / "rutas_actuales.csv"
    ant_csv = outdir / "rutas_anteriores.csv"

    # Exporta preservando NaN (alias null). Por defecto pandas escribe celdas vacías.
    df_act.to_csv(act_csv, index=False, encoding="utf-8-sig")
    df_ant.to_csv(ant_csv, index=False, encoding="utf-8-sig")

    print(f"✔ HTML: {html_path}")
    print(f"✔ Rutas actuales → {act_csv}  (filas: {len(df_act)})")
    print(f"✔ Rutas anteriores → {ant_csv} (filas: {len(df_ant)})")
    return act_csv, ant_csv

# ------------------ Ejecución directa ------------------

def main():
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument("--html", default="config/wikirutas.html")
    args, _ = parser.parse_known_args()

    html_path = Path(args.html)
    if not html_path.exists():
        # Busca 'config/wikirutas.html' desde CWD hacia arriba
        cwd = Path.cwd().resolve()
        found = None
        for parent in [cwd, *cwd.parents]:
            cand = parent / "config" / "wikirutas.html"
            if cand.exists():
                found = cand
                break
        if found is None:
            raise FileNotFoundError(
                f"No se encontró '{args.html}'. "
                f"Ejecuta desde la raíz del proyecto o pasa --html RUTA/AL/ARCHIVO.html"
            )
        html_path = found

    extract_to_csv_same_dir(html_path)

if __name__ == "__main__":
    main()


✔ HTML: D:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\config\wikirutas.html
✔ Rutas actuales → D:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\config\rutas_actuales.csv  (filas: 223)
✔ Rutas anteriores → D:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\config\rutas_anteriores.csv (filas: 822)


## Transformación beta

In [4]:
# scripts/convert.ipynb — Overpass bus → GeoJSON
# + listado de rutas con color
# + helpers para cambiar colores (por rel_id, ref o regex)

import json, re, csv
from pathlib import Path
from collections import defaultdict

# ========= Rutas relativas =========
def ROOT():
    cwd = Path.cwd()
    if cwd.name == "scripts" and (cwd.parent / "data").exists(): return cwd.parent
    if (cwd / "scripts").exists() and (cwd / "data").exists():   return cwd
    for p in cwd.parents:
        if (p / "data").exists(): return p
    return cwd

BASE     = ROOT()
IN_PATH  = BASE / "data" / "raw" / "osm" / "transporte.json"   # <-- ajusta si tu archivo se llama distinto
OUT_DIR  = BASE / "data" / "raw" / "converted"
OUT_JSON = OUT_DIR / "transporte.json"
OUT_GEO  = OUT_DIR / "transporte.geojson"
OUT_CSV  = OUT_DIR / "transporte_palette_map.csv"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========= Paleta (40 colores) =========
PALETTE = [
    "#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd","#8c564b",
    "#e377c2","#7f7f7f","#bcbd22","#17becf",
    "#4e79a7","#f28e2b","#59a14f","#e15759","#76b7b2","#edc948",
    "#b07aa1","#ff9da7","#9c755f","#bab0ab",
    "#8dd3c7","#ffffb3","#bebada","#fb8072","#80b1d3","#fdb462",
    "#b3de69","#fccde5","#bc80bd","#ccebc5",
    "#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c",
    "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a",
]
DEFAULT_COLOR = "#888888"

# ========= Overrides (puedes editarlos) =========
# 1) Por ID de relación exacto
ROUTE_COLOR_OVERRIDES_RELID = {
    # 123456789: "#FF4500",
}
# 2) Por ref exacta (p. ej., "X-SO-24-I")
ROUTE_COLOR_OVERRIDES_REF = {
    # "X-SO-24-I": "#FF4500",
}
# 3) Reglas por regex (se aplica la primera que haga match)
ROUTE_COLOR_RULES = [
    # (re.compile(r"^X-SO"), "#7B1FA2"),
]

# ========= Helpers para cambiar colores (usa estas funciones) =========
def set_route_color(identifier, hex_color):
    """
    Cambia el color de una ruta de forma simple.
    - identifier: int (relation_id) o str (ref exacta).
    - hex_color: string, ej. "#FF4500"
    Uso:
        set_route_color(123456789, "#FF4500")      # por ID
        set_route_color("X-SO-24-I", "#FF4500")    # por ref
    """
    if isinstance(identifier, int):
        ROUTE_COLOR_OVERRIDES_RELID[identifier] = hex_color
        return "rel_id"
    if isinstance(identifier, str):
        ROUTE_COLOR_OVERRIDES_REF[identifier] = hex_color
        return "ref"
    raise TypeError("identifier debe ser int (rel_id) o str (ref)")

def set_route_color_regex(pattern, hex_color, prepend=True):
    """
    Añade una regla por regex sobre la ref. Útil para colorear muchos refs afines.
    Uso:
        set_route_color_regex(r'^X-SO', '#7B1FA2')
    """
    rx = re.compile(pattern)
    if prepend:
        ROUTE_COLOR_RULES.insert(0, (rx, hex_color))  # prioridad alta
    else:
        ROUTE_COLOR_RULES.append((rx, hex_color))
    return "regex"

# ========= Limpieza de properties =========
KEEP = {"ref","name","from","to","operator","route","description"}
DROP = {"source","created_by","opening_hours","phone","email","website",
        "wikidata","wikipedia","short_name","alt_name","old_name",
        "check_date","survey:date","start_date","end_date","maxspeed","max_speed"}
DROP_P = ("addr:","contact:","gnis:","tiger:","seamark:","source:")

def clean(tags: dict) -> dict:
    out={}
    for k,v in (tags or {}).items():
        if k in KEEP: out[k]=v; continue
        if k in DROP: continue
        if any(k.startswith(p) for p in DROP_P): continue
    return out

# ========= Utilidades Overpass/Geo =========
def is_overpass(d): return isinstance(d, dict) and isinstance(d.get("elements"), list)

def build_indexes(elems):
    nodes, ways, rels = {}, {}, []
    for el in elems:
        t = el.get("type")
        if t == "node": nodes[el["id"]] = el
        elif t == "way": ways[el["id"]]  = el
        elif t == "relation": rels.append(el)
    return nodes, ways, rels

def coords_from_way(way, nodes_by_id):
    if not way: return []
    if way.get("geometry"):
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

def coords_from_member(m, ways_by_id, nodes_by_id):
    """Acepta geometry embebida del member; o enlaza a way top-level; o parsea _fullGeom123."""
    if m.get("type") != "way": return []
    if m.get("geometry"):  # geometry embebida del member
        return [[pt["lon"], pt["lat"]] for pt in m["geometry"]]
    ref = m.get("ref")
    if isinstance(ref, int) and ref in ways_by_id:
        return coords_from_way(ways_by_id[ref], nodes_by_id)
    if isinstance(ref, str):
        mm = re.search(r"(\d+)$", ref)
        if mm:
            wid = int(mm.group(1))
            if wid in ways_by_id:
                return coords_from_way(ways_by_id[wid], nodes_by_id)
    return []

STOP_ROLES = {"stop","platform","stop_entry_only","platform_entry_only","stop_exit_only","platform_exit_only"}

def stop_feats_from_relation(rel, nodes_by_id, color):
    feats=[]
    for m in rel.get("members", []):
        if m.get("type") != "node": continue
        role = (m.get("role") or "").lower()
        if role not in STOP_ROLES: continue
        node = nodes_by_id.get(m.get("ref"))
        if not node: continue
        feats.append({
            "type":"Feature",
            "geometry":{"type":"Point","coordinates":[node["lon"], node["lat"]]},
            "properties":{
                "kind":"stop","marker-color":color,"marker-symbol":"bus",
                "_osm_type":"node","_osm_id":node["id"]
            }
        })
    return feats

# ========= Color por ruta =========
def route_key_for(rel):
    """Clave estable: prefiera ref; si no, name; si no, id."""
    tags = rel.get("tags") or {}
    ref  = (tags.get("ref") or "").strip()
    name = (tags.get("name") or "").strip()
    return ref or name or str(rel["id"])

def color_for_route(rel, index_map):
    rid  = rel["id"]
    tags = rel.get("tags") or {}
    ref  = (tags.get("ref") or "").strip()

    # prioridad: overrides → regex → paleta/mod
    if rid in ROUTE_COLOR_OVERRIDES_RELID:
        return ROUTE_COLOR_OVERRIDES_RELID[rid]
    if ref and ref in ROUTE_COLOR_OVERRIDES_REF:
        return ROUTE_COLOR_OVERRIDES_REF[ref]
    for rx, col in ROUTE_COLOR_RULES:
        if ref and rx.search(ref):
            return col
    idx = index_map[route_key_for(rel)] % len(PALETTE)
    return PALETTE[idx]

# ========= Ejecutar =========
data = json.loads(IN_PATH.read_text(encoding="utf-8"))
if not is_overpass(data):
    raise ValueError("El archivo no parece Overpass JSON (falta 'elements').")

els = data["elements"]
nodes_by_id, ways_by_id, relations = build_indexes(els)
bus_rels = [r for r in relations if (r.get("tags") or {}).get("route") == "bus"]

# Índice estable para la paleta (por clave de ruta)
keys_sorted = sorted({route_key_for(r) for r in bus_rels}, key=lambda x: (x is None, x))
index_map = {k:i for i,k in enumerate(keys_sorted)}  # clave → índice

# ======== PRINT: listado de TODAS las rutas con color ========
def print_detected_routes(rels, index_map):
    rows=[]
    for r in rels:
        tags = r.get("tags") or {}
        ref  = (tags.get("ref") or "").strip()
        name = (tags.get("name") or "").strip()
        col  = color_for_route(r, index_map)
        rows.append((ref, name, r["id"], col))
    # ordenar por ref, luego name
    rows.sort(key=lambda t: (t[0] or "", t[1] or "", t[2]))
    print(f"Total rutas: {len(rows)}")
    for ref, name, rid, col in rows:
        label = ref or name or f"rel/{rid}"
        print(f"- {label}  | id={rid}  | color={col}")
    return rows

_ = print_detected_routes(bus_rels, index_map)

# ======== Construcción GeoJSON ========
features=[]
palette_rows=[]

for rel in bus_rels:
    tags = rel.get("tags") or {}
    ref  = (tags.get("ref") or "").strip()
    name = (tags.get("name") or "").strip()
    color= color_for_route(rel, index_map)

    # tramos (acepta geometry embebida)
    lines=[]
    for m in rel.get("members", []):
        if m.get("type") != "way": continue
        coords = coords_from_member(m, ways_by_id, nodes_by_id)
        if coords: lines.append(coords)

    if lines:
        base = {**clean(tags), "_osm_type":"relation","_osm_id":rel["id"]}
        features.append({
            "type":"Feature",
            "geometry":{"type":"MultiLineString","coordinates":lines},
            "properties":{
                **base, "kind":"route", "title": (name or ref or f"rel/{rel['id']}"),
                "stroke": color, "stroke-width": 4, "stroke-opacity": 1.0
            }
        })

    # paradas si las hay
    features += stop_feats_from_relation(rel, nodes_by_id, color)

    # fila para CSV de mapeo
    palette_rows.append({
        "relation_id": rel["id"],
        "route_key": route_key_for(rel),
        "ref": ref,
        "name": name,
        "color": color,
        "n_segments": len(lines),
    })

# ========= Guardar =========
geojson={"type":"FeatureCollection","features":features}
for out in (OUT_JSON, OUT_GEO):
    out.write_text(json.dumps(geojson, ensure_ascii=False, indent=2), encoding="utf-8")
    print("✔ Guardado:", out)

with OUT_CSV.open("w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=["relation_id","route_key","ref","name","color","n_segments"])
    w.writeheader(); w.writerows(palette_rows)
print("✔ Guardado:", OUT_CSV)

# ========= Resumen =========
n_routes = sum(1 for f in features if f["properties"].get("kind")=="route")
n_stops  = sum(1 for f in features if f["properties"].get("kind")=="stop")
print(f"Resumen → rutas: {n_routes} | paradas: {n_stops}")
print(f"Paleta usada: {len(PALETTE)} colores (módulo por índice estable)")
print("Overrides por ID:", len(ROUTE_COLOR_OVERRIDES_RELID), "| overrides por ref:", len(ROUTE_COLOR_OVERRIDES_REF), "| reglas regex:", len(ROUTE_COLOR_RULES))


Total rutas: 207
- rel/3778984  | id=3778984  | color=#e377c2
- rel/4193367  | id=4193367  | color=#7f7f7f
- rel/4193500  | id=4193500  | color=#bcbd22
- rel/4252750  | id=4252750  | color=#17becf
- rel/4455259  | id=4455259  | color=#4e79a7
- rel/4455850  | id=4455850  | color=#f28e2b
- rel/4467480  | id=4467480  | color=#59a14f
- rel/4782800  | id=4782800  | color=#e15759
- rel/4787549  | id=4787549  | color=#76b7b2
- rel/4789418  | id=4789418  | color=#edc948
- rel/4820596  | id=4820596  | color=#b07aa1
- Airport Express Lima  | id=16812606  | color=#ff9da7
- CR13_I  | id=4511802  | color=#9c755f
- CR14_I  | id=4512067  | color=#bab0ab
- CR23_I  | id=4512296  | color=#8dd3c7
- IM49_I  | id=4521427  | color=#bebada
- IM50_I  | id=4521493  | color=#fb8072
- IM50_I  | id=4521619  | color=#fb8072
- RutaTuristica1  | id=5582236  | color=#fdb462
- 1007  | id=19413426  | color=#1f77b4
- 1008  | id=19572753  | color=#ff7f0e
- 1019  | id=19575841  | color=#2ca02c
- 1021  | id=19597970  | col

## Transformar los codigos de ruta

In [9]:
# -*- coding: utf-8 -*-
"""
Enriquecedor de GeoJSON/relations con equivalencias de rutas (modernas/actuales/anteriores)
+ particionado de transporte (antiguo/modernizado/sin match) con geometría.
"""
from __future__ import annotations

import re
import os
import json
import csv
from pathlib import Path
from typing import Optional, Tuple, Dict, Any, List
from collections import defaultdict, Counter

import pandas as pd


# ========= Paleta (60 colores) y overrides =========
PALETTE = [
    "#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd","#8c564b",
    "#e377c2","#7f7f7f","#bcbd22","#17becf",
    "#4e79a7","#f28e2b","#59a14f","#e15759","#76b7b2","#edc948",
    "#b07aa1","#ff9da7","#9c755f","#bab0ab",
    "#8dd3c7","#ffffb3","#bebada","#fb8072","#80b1d3","#fdb462",
    "#b3de69","#fccde5","#bc80bd","#ccebc5",
    "#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c",
    "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a",
    "#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f",
    "#e5c494","#b3b3b3","#8c9eff","#ffab91","#80cbc4","#c5e1a5",
]
DEFAULT_COLOR = "#888888"

# 1) overrides por ID de relación
ROUTE_COLOR_OVERRIDES_RELID: Dict[int, str] = {
    # 123456789: "#FF4500",
}
# 2) overrides por ref exacta
ROUTE_COLOR_OVERRIDES_REF: Dict[str, str] = {
    # "X-SO-24-I": "#FF4500",
}
# 3) reglas regex por ref (se aplica la primera que hace match)
ROUTE_COLOR_RULES: List[Tuple[re.Pattern, str]] = [
    # (re.compile(r"^X-SO"), "#7B1FA2"),
]

def set_route_color(identifier, hex_color):
    """Cambiar color por rel_id (int) o por ref (str)."""
    if isinstance(identifier, int):
        ROUTE_COLOR_OVERRIDES_RELID[identifier] = hex_color
        return "rel_id"
    if isinstance(identifier, str):
        ROUTE_COLOR_OVERRIDES_REF[identifier] = hex_color
        return "ref"
    raise TypeError("identifier debe ser int (rel_id) o str (ref)")

def set_route_color_regex(pattern, hex_color, prepend=True):
    """Añade una regla regex sobre 'ref'."""
    rx = re.compile(pattern)
    if prepend:
        ROUTE_COLOR_RULES.insert(0, (rx, hex_color))
    else:
        ROUTE_COLOR_RULES.append((rx, hex_color))
    return "regex"


# ========================= utilidades de normalización =========================

def _clean(s: Any) -> str:
    if s is None:
        return ""
    s = str(s).replace("\u2013", "-").replace("\u2014", "-").replace("–", "-").replace("—", "-").strip()
    return s

def _norm_hex(color: str) -> str:
    """Normaliza a #RRGGBB. Acepta #rgb/#RRGGBB/rgb/RRGGBB. Devuelve '' si es inválido."""
    s = _clean(color).strip()
    if not s:
        return ""
    c = s.lstrip("#").upper()
    if re.fullmatch(r"[0-9A-F]{3}", c):
        c = "".join(ch * 2 for ch in c)
    if re.fullmatch(r"[0-9A-F]{6}", c):
        return f"#{c}"
    return ""

def _style_aliases(color_hex: str, width: int = 2, opacity: float = 1.0) -> Dict[str, Any]:
    """Alias de estilo para visores (no toca geometría)."""
    c = _norm_hex(color_hex) or "#555555"
    return {
        "color": c, "stroke": c, "stroke-color": c, "stroke_color": c,
        "lineColor": c, "line_color": c,
        "stroke-width": width, "stroke_width": width, "lineWidth": width, "line_width": width,
        "stroke-opacity": opacity, "stroke_opacity": opacity, "lineOpacity": opacity, "line_opacity": opacity,
    }

# Familias válidas
ALLOWED_FAMILIES = {"CR", "ICR", "OCR"}

def _fix_family_prefix(code: str) -> str:
    s = (code or "").upper()
    s = re.sub(r"^(?:E|N|S)?CR", "CR", s)
    s = re.sub(r"^(?:E)?ICR", "ICR", s)
    s = re.sub(r"^(?:E)?OCR", "OCR", s)
    s = re.sub(r"^IM", "ICR", s)
    s = re.sub(r"^IPC", "ICR", s)
    s = re.sub(r"^IO", "OCR", s)
    s = re.sub(r"^OM", "OCR", s)
    return s

def _family_prefix(s: str) -> str:
    m = re.match(r"^[A-Z]+", s or "")
    return m.group(0) if m else ""

def is_numeric_code(s: str) -> bool:
    return re.fullmatch(r"\d{3,5}", s or "") is not None

def is_allowed_family(code: str) -> bool:
    fam = _family_prefix(code)
    return fam == "" or fam in ALLOWED_FAMILIES or is_numeric_code(code)

def norm_ref(ref: str) -> Tuple[str, str]:
    raw = _clean(ref).upper().replace(" ", "")
    m = re.match(r"^(?:X-)?([A-Z0-9]+?)[_-]([IVEX])$", raw)
    if m:
        base, suf = m.group(1), m.group(2)
        direction = {"I": "ida", "V": "vuelta", "E": "especial", "X": ""}.get(suf, "")
    else:
        base, direction = raw, ""
    if base.startswith("X-"):
        base = base[2:]
    base = re.sub(r"[_-](I|V|E|X)$", "", base)
    base = _fix_family_prefix(base)
    base = re.sub(r"(I|V|E|X)$", "", base)
    base = base.replace("_", "-")
    base = re.sub(r"-$", "", base)
    return base, direction

# ========================= búsqueda de config/ =========================

def find_config_dir(explicit: Optional[Path] = None) -> Path:
    if explicit:
        d = Path(explicit)
        if d.is_dir():
            return d / "config" if (d / "config").is_dir() else d
    here = Path.cwd().resolve()
    for upto in [here, *here.parents]:
        cand = upto / "config"
        if cand.is_dir():
            return cand
    raise FileNotFoundError("No se encontró carpeta 'config' hacia arriba desde el CWD. Crea 'config/' o pasa --config-root.")

# ========================= carga de tablas =========================

def _col(df: pd.DataFrame, *names: str, default: str = "") -> pd.Series:
    for n in names:
        if n in df.columns:
            return df[n].astype(str).fillna(default)
    return pd.Series([default] * len(df))

def load_rutas_modern(config: Path) -> pd.DataFrame:
    p = config / "rutas.csv"
    if not p.exists():
        return pd.DataFrame(columns=["source","new_code","new_color","alias","origen","destino","empresa","old_code","abreviacion"])
    df = pd.read_csv(p, dtype=str).fillna("")
    out = pd.DataFrame({
        "source": "rutas.csv",
        "new_code": _col(df, "CodigoRuta","codigo_ruta","ruta","Ruta").str.upper(),
        "new_color": _col(df, "Color","color","ruta_color").str.upper(),
        "alias": _col(df, "Alias","alias"),
        "origen": _col(df, "Origen","origen"),
        "destino": _col(df, "Destino","destino"),
        "empresa": _col(df, "Empresa","empresa"),
        "old_code": _col(df, "CodigoAntiguo","codigo_anterior","CodigoAnterior").str.upper(),
        "abreviacion": _col(df, "abreviacion","abrev","abbr"),
    })
    out = out[out["new_code"]!=""]
    return out.drop_duplicates(subset=["new_code"]).reset_index(drop=True)

def load_rutas_actuales(config: Path) -> pd.DataFrame:
    p = config / "rutas_actuales.csv"
    if not p.exists():
        return pd.DataFrame(columns=["source","new_code","new_color","alias","origen","destino","empresa","old_code","old_color","grupo","seccion","abreviacion"])
    df = pd.read_csv(p, dtype=str).fillna("")
    out = pd.DataFrame({
        "source": "rutas_actuales.csv",
        "new_code": _col(df, "ruta").str.upper(),
        "new_color": _col(df, "ruta_color").str.upper(),
        "alias": _col(df, "alias"),
        "origen": _col(df, "origen"),
        "destino": _col(df, "destino"),
        "empresa": _col(df, "empresa"),
        "old_code": _col(df, "codigo_anterior").str.upper(),
        "old_color": _col(df, "codigo_anterior_color").str.upper(),
        "grupo": _col(df, "grupo"),
        "seccion": _col(df, "seccion"),
        "abreviacion": _col(df, "abreviacion","abrev","abbr"),
    })
    out = out[out["new_code"]!=""]
    return out.drop_duplicates(subset=["new_code"]).reset_index(drop=True)

def load_rutas_anteriores(config: Path) -> pd.DataFrame:
    p = config / "rutas_anteriores.csv"
    if not p.exists():
        return pd.DataFrame(columns=["source","old_code","old_color","alias","origen","destino","empresa","estado","grupo","seccion","abreviacion"])
    df = pd.read_csv(p, dtype=str).fillna("")
    out = pd.DataFrame({
        "source": "rutas_anteriores.csv",
        "old_code": _col(df, "ruta").str.upper(),
        "old_color": _col(df, "ruta_color").str.upper(),
        "alias": _col(df, "alias"),
        "origen": _col(df, "origen"),
        "destino": _col(df, "destino"),
        "empresa": _col(df, "empresa"),
        "estado": _col(df, "estado_de_la_ruta","estado"),
        "grupo": _col(df, "grupo"),
        "seccion": _col(df, "seccion"),
        "abreviacion": _col(df, "abreviacion","abrev","abbr"),
    })
    out = out[out["old_code"]!=""]
    return out.drop_duplicates(subset=["old_code"]).reset_index(drop=True)

# ========================= indexador y matcher =========================

def _crop_x_prefix(s: str) -> str:
    return re.sub(r'^X-', '', (s or ''), flags=re.I)

class RouteMatcher:
    def __init__(self, config_root: Optional[Path] = None):
        self.config_dir = find_config_dir(config_root)
        self.df_modern = load_rutas_modern(self.config_dir)
        self.df_actual = load_rutas_actuales(self.config_dir)
        self.df_prev   = load_rutas_anteriores(self.config_dir)
        self.by_new: Dict[str, Dict[str, Any]] = {}
        self.by_old_to_new: Dict[str, str] = {}
        self.old_rows: Dict[str, Dict[str, Any]] = {}
        self._build_indexes()

    def _build_indexes(self):
        for _, r in self.df_actual.iterrows():
            self.by_new[str(r[("new_code")])] = r.to_dict()
        for _, r in self.df_modern.iterrows():
            self.by_new[str(r[("new_code")])] = r.to_dict()
        for df in (self.df_actual, self.df_modern):
            for _, r in df.iterrows():
                oc = _fix_family_prefix(_crop_x_prefix(_clean(r.get("old_code","")).upper()))
                nc = _fix_family_prefix(_clean(r.get("new_code","")))
                if oc:
                    self.by_old_to_new[oc] = nc
        for _, r in self.df_prev.iterrows():
            oc = _fix_family_prefix(_crop_x_prefix(_clean(r.get("old_code","")).upper()))
            if oc:
                self.old_rows[oc] = r.to_dict()

    def match(self, ref_raw: str) -> Tuple[str, Dict[str, Any], str, str]:
        base, _direction = norm_ref(ref_raw)
        if is_numeric_code(base) and base in self.by_new:
            row = self.by_new[base]
            return base, row, row.get("source",""), "new_code"
        if base in self.by_old_to_new:
            nc = self.by_old_to_new[base]
            row = self.by_new.get(nc, {})
            if row:
                return nc, row, row.get("source",""), "old_code"
        hb = re.sub(r"^E", "", base)
        if hb != base and hb in self.by_old_to_new:
            nc = self.by_old_to_new[hb]
            row = self.by_new.get(nc, {})
            if row:
                return nc, row, row.get("source",""), "heuristic_old"
        if base in self.old_rows:
            return "", self.old_rows[base], "rutas_anteriores.csv", "old_only"
        return "", {}, "", ""

    def enrich_properties(self, ref_raw: str) -> Dict[str, Any]:
        base_norm, _direction = norm_ref(ref_raw)
        nc, row, src, how = self.match(ref_raw)
        out = {
            "ref_raw": ref_raw,
            "match_source": src or "",
            "match_method": how or "",
            "route_code": nc or "",
            "old_code": "",
            "alias": "",
            "origen": "",
            "destino": "",
            "empresa": "",
            "color": "",
            "route_code_base": base_norm,
            "family_ok": bool(is_allowed_family(base_norm)),
        }
        if row:
            if src == "rutas_anteriores.csv" and not nc:
                out["old_code"] = _clean(row.get("old_code","" )).upper()
                out["alias"]    = _clean(row.get("alias",""))
                out["origen"]   = _clean(row.get("origen",""))
                out["destino"]  = _clean(row.get("destino",""))
                out["empresa"]  = _clean(row.get("empresa",""))
                out["color"]    = _norm_hex(row.get("old_color",""))
            else:
                out["alias"]    = _clean(row.get("alias",""))
                out["origen"]   = _clean(row.get("origen",""))
                out["destino"]  = _clean(row.get("destino",""))
                out["empresa"]  = _clean(row.get("empresa",""))
                out["color"]    = _norm_hex(row.get("new_color", row.get("ruta_color","")))
                if row.get("old_code"):
                    out["old_code"] = _clean(row.get("old_code")).upper()
        return out

# ========================= Transporte: lectura lista y particionado con GEOMETRÍA =========================

CONVERTED_TRANSPORTE_PATH = Path("data/raw/converted/transporte/transporte.json")

def _read_converted_transporte(root: Path) -> List[Dict[str, Any]]:
    path = root / CONVERTED_TRANSPORTE_PATH
    if not path.exists():
        raise FileNotFoundError(str(path))
    return json.loads(path.read_text(encoding="utf-8"))

def _coerce_multilinestring(geom: Dict[str, Any]) -> List[List[List[float]]]:
    if not geom:
        return []
    t = geom.get("type")
    if t == "MultiLineString":
        return geom.get("coordinates", []) or []
    if t == "LineString":
        coords = geom.get("coordinates") or []
        return [coords] if coords else []
    return []

def _code_from_props(props: Dict[str, Any]) -> str:
    importance = ["CodigoRuta","codigo_ruta","CODIGORUTA","Ruta","ruta","route_code",
                  "id_new","id_old","new_code","old_code","ref","ref_raw","code","base"]
    rx = re.compile(r'\b(?:ICR|OCR|CR)\d+[A-Z]?\b|\b\d{3,5}\b', re.I)
    def _pick(s: str) -> str:
        if not isinstance(s, str):
            return ""
        m = rx.search(s.upper())
        if m:
            base, _ = norm_ref(m.group(0))
            return base
        return ""
    for k in importance:
        if k in props:
            c = _pick(str(props.get(k)))
            if c: return c
    for v in props.values():
        c = _pick(str(v))
        if c: return c
    return ""

def _autodetect_rutas_geo(root: Path) -> Optional[Path]:
    # Priorizar el GeoJSON convertido que sí trae MultiLineString por relación
    candidates = [
        Path("data/raw/converted/transporte/transporte.geojson"),  # ← primero: archivo generado por convert
        Path("data/rutas_lineas.geojson"),
        Path("scripts/output/relations.geojson"),
        Path("data/relations.geojson"),
        Path("relations.geojson"),
        Path("data/raw/converted/transporte/transporte_rutasfull.geojson"),
        Path("transporte_rutasfull.geojson"),
    ]
    for rel in candidates:
        p = (root / rel) if not rel.is_absolute() else rel
        if p.exists():
            return p
    return None

# --- Índice de geometrías a partir de rutas_geo ---
def build_geometry_index(geo_path: Path) -> Dict[str, Dict[str, Any]]:
    obj = json.loads(geo_path.read_text(encoding="utf-8"))
    feats = obj.get("features", []) if obj.get("type") == "FeatureCollection" else (
        [obj] if obj.get("type") == "Feature" else []
    )
    acc: Dict[str, List[List[List[float]]]] = defaultdict(list)

    # Regex secundaria (fallback) – la primaria es normalización directa
    rx = re.compile(r'(?:ICR|OCR|CR)\s*\d+[A-Z]?|\d{3,5}', re.I)

    def _is_numeric_code(s: str) -> bool:
        return re.fullmatch(r'\d{3,5}', s or '') is not None

    def _is_allowed_family(code: str) -> bool:
        fam = re.match(r'^[A-Z]+', code or '')
        fam = fam.group(0) if fam else ''
        return fam in {'CR','ICR','OCR'} or _is_numeric_code(code)

    def _extract_code(props: Dict[str, Any]) -> str:
        # 1) Pase principal: normaliza candidatos típicos
        for k in ["ref","name","title","code","base","CodigoRuta","codigo_ruta","Ruta","ruta",
                  "route_code","id_new","id_old","new_code","old_code","ref_raw"]:
            v = props.get(k)
            if not v:
                continue
            base, _ = norm_ref(str(v))
            base = _fix_family_prefix(base)
            if _is_allowed_family(base):
                return base

        # 2) Fallback: regex dentro de cualquier string (pre-normalizando prefijos)
        for v in props.values():
            if not isinstance(v, str):
                continue
            vv = v.upper()
            vv = re.sub(r'\b[ENS]?CR', 'CR', vv)  # ECR/NCR/SCR -> CR
            vv = vv.replace('IM', 'ICR').replace('IPC','ICR').replace('IO','OCR').replace('OM','OCR')
            m = rx.search(vv)
            if m:
                base, _ = norm_ref(m.group(0))
                return _fix_family_prefix(base)
        return ""

    for ft in feats:
        props = (ft or {}).get("properties", {}) or {}
        code = _extract_code(props)
        if not code:
            continue
        lines = _coerce_multilinestring((ft or {}).get("geometry") or {})
        if lines:
            acc[code].extend(lines)

    out: Dict[str, Dict[str, Any]] = {}
    for code, lines in acc.items():
        out[code] = {"type": "MultiLineString", "coordinates": lines}

    print(f"• Índice de geometrías: {len(out)} códigos desde {geo_path}")
    for probe in ["CR02","CR13","1135","1136"]:
        if probe in out:
            print(f"  - ok: {probe} presente en índice")
    return out

def _build_old_new_indexes(m: RouteMatcher):
    ant_by_old: Dict[str, Dict[str, Any]] = {}
    for _, r in m.df_prev.iterrows():
        oc = _fix_family_prefix(_crop_x_prefix(_clean(r.get("old_code", "").upper())))
        if oc:
            ant_by_old[oc] = r.to_dict()
    act_by_old: Dict[str, Dict[str, Any]] = {}
    act_by_new: Dict[str, Dict[str, Any]] = {}
    for _, r in m.df_actual.iterrows():
        oc = _fix_family_prefix(_crop_x_prefix(_clean(r.get("old_code", "").upper())))
        nc = _fix_family_prefix(_clean(r.get("new_code", "").upper()))
        if oc:
            act_by_old[oc] = r.to_dict()
        if nc:
            act_by_new[nc] = r.to_dict()
    return ant_by_old, act_by_old, act_by_new

def _choose_tag(row: Dict[str, Any]) -> str:
    ab = _clean(row.get("abreviacion", ""))
    if ab:
        return ab
    return _clean(row.get("empresa", ""))

def _normalize_relation_props(rec: Dict[str, Any], ant_by_old, act_by_old) -> Dict[str, Any]:
    ref = _clean(rec.get("ref") or rec.get("title") or rec.get("name") or rec.get("id"))
    base, _d = norm_ref(ref)
    status = "unmatched"
    id_old = base
    id_new = ""
    color = ""
    tag = ""
    prev = ant_by_old.get(base)
    if prev:
        id_old = _clean(prev.get("old_code", base)).upper()
        act = act_by_old.get(id_old)
        if act:
            id_new = _clean(act.get("new_code", "")).upper()
            color = _norm_hex(act.get("new_color", "") or act.get("ruta_color",""))
            tag = _choose_tag(act)
            status = "modern"
        else:
            color = _norm_hex(prev.get("old_color", ""))
            tag = _choose_tag(prev)
            status = "old_only"
    props = {
        "id": rec.get("id"),
        "ref": rec.get("ref", ""),
        "title": rec.get("title", ""),
        "route": (rec.get("route") or rec.get("route_master") or "").lower(),
        "operator": rec.get("operator", ""),
        "from": rec.get("from", ""),
        "to": rec.get("to", ""),
        "network": rec.get("network", ""),
        "id_old": id_old,
        "id_new": id_new,
        "color": color,
        "tag": tag,
        "label": tag,
        "status": status,
        "route_code_base": base,
        "family_ok": is_allowed_family(base),
    }
    return props

def _coerce_relation_record(rec: Any) -> Dict[str, Any]:
    """
    Acepta:
      - dict de OSM/Feature (con ref/title/name/id/route…)
      - str: se toma como ref (ej. "X-SO55-I")
      - int: se toma como id de relación
    Devuelve un dict canónico con llaves esperadas.
    """
    if isinstance(rec, dict):
        return {
            "id": rec.get("id"),
            "ref": rec.get("ref") or "",
            "title": rec.get("title") or rec.get("name") or "",
            "route": (rec.get("route") or rec.get("route_master") or "").lower(),
            "operator": rec.get("operator") or "",
            "from": rec.get("from") or "",
            "to": rec.get("to") or "",
            "network": rec.get("network") or "",
        }
    if isinstance(rec, str):
        return {
            "id": None,
            "ref": rec,
            "title": "",
            "route": "",
            "operator": "",
            "from": "",
            "to": "",
            "network": "",
        }
    if isinstance(rec, (int, float)):
        return {
            "id": int(rec),
            "ref": "",
            "title": "",
            "route": "",
            "operator": "",
            "from": "",
            "to": "",
            "network": "",
        }
    s = str(rec)
    return {
        "id": None,
        "ref": s,
        "title": "",
        "route": "",
        "operator": "",
        "from": "",
        "to": "",
        "network": "",
    }

# === NUEVO: helper para sobrescribir ref/title con equivalencias ===
def _apply_equivalence_ref_title(props: Dict[str, Any]) -> None:
    """
    Sobrescribe props['ref'] y props['title'] eliminando X-/sufijos y
    reemplazando por equivalencias si existen.
    Prioridad: route_code(id_new) > id_new > id_old > route_code_base > ref/title/ref_raw normalizados.
    """
    def pick() -> str:
        rc = _clean(props.get("route_code", "")).upper()
        if rc:
            return rc
        id_new = _clean(props.get("id_new", "")).upper()
        if id_new:
            return id_new
        id_old = _clean(props.get("id_old", "")).upper()
        if id_old:
            return id_old
        base = _clean(props.get("route_code_base", "")).upper()
        if base:
            return base
        raw = _clean(props.get("ref") or props.get("title") or props.get("ref_raw"))
        base2, _ = norm_ref(raw)
        return base2

    preferred = _fix_family_prefix(pick())
    if preferred:
        props["ref"] = preferred
        props["title"] = preferred

# --- LECTOR ROBUSTO DEL CONVERTIDO (lista o FeatureCollection) ---
def _load_converted_rel_records(transporte_path: Path) -> List[Dict[str, Any]]:
    """
    Carga data/raw/converted/transporte/transporte.json.
    - Si es una lista de objetos con {id, ref, title...}, la devuelve tal cual (normalizada).
    - Si es un GeoJSON FeatureCollection, extrae solo las features de 'ruta' y
      transforma sus properties a un dict de relación compatible con _normalize_relation_props.
    """
    obj = json.loads(transporte_path.read_text(encoding="utf-8"))
    # Caso 1: ya es lista de relaciones
    if isinstance(obj, list):
        out = []
        for rec in obj:
            out.append(_coerce_relation_record(rec))
        return out

    # Caso 2: FeatureCollection / Feature
    if isinstance(obj, dict) and obj.get("type") in ("FeatureCollection", "Feature"):
        feats = obj.get("features", []) if obj.get("type") == "FeatureCollection" else [obj]
        out = []
        for ft in feats:
            p = (ft or {}).get("properties", {}) or {}
            if not p:
                continue
            kind = (p.get("kind") or "").lower()
            if kind and kind != "route":
                continue
            out.append({
                "id": p.get("_osm_id") or p.get("id"),
                "ref": p.get("ref") or "",
                "title": p.get("title") or p.get("name") or "",
                "route": (p.get("route") or p.get("route_master") or "").lower(),
                "operator": p.get("operator") or "",
                "from": p.get("from") or "",
                "to": p.get("to") or "",
                "network": p.get("network") or "",
            })
        return out

    raise ValueError(f"Formato no soportado en {transporte_path}")

# --- PARTICIONADO (sin bucles duplicados) ---
def partition_transporte_to_geojsons(matcher: RouteMatcher,
                                     rutas_geo: Optional[Path] = None,
                                     transporte_path: Optional[Path] = None) -> Tuple[Path, Path, Path]:
    root = matcher.config_dir.parent

    # 1) cargar lista de relaciones convertida (acepta lista o FeatureCollection)
    if transporte_path is None:
        transporte_path = root / CONVERTED_TRANSPORTE_PATH
    if not transporte_path.exists():
        raise FileNotFoundError(str(transporte_path))
    rels = _load_converted_rel_records(transporte_path)

    # 2) cargar índice de geometrías (autodetect por defecto)
    if rutas_geo is None:
        rutas_geo = _autodetect_rutas_geo(root)
    geom_index: Dict[str, Dict[str, Any]] = {}
    if rutas_geo and Path(rutas_geo).exists():
        geom_index = build_geometry_index(Path(rutas_geo))
    else:
        print("• Aviso: no se encontró GeoJSON de líneas; las geometrías quedarán null.")

    ant_by_old, act_by_old, _act_by_new = _build_old_new_indexes(matcher)

    feats_modern, feats_old_only, feats_unmatched = [], [], []

    def _geom_for_pref(ids: List[str]) -> Optional[Dict[str, Any]]:
        for code in ids:
            if not code:
                continue
            key = _fix_family_prefix(str(code).upper())
            g = geom_index.get(key)
            if g:
                return g
        return None

    # 3) Clasificar y construir features
    for rec in rels:
        props = _normalize_relation_props(rec, ant_by_old, act_by_old)

        # === NUEVO: forzar ref/title con equivalencias ===
        _apply_equivalence_ref_title(props)

        # Geometría por prioridad: id_new > id_old > base del ref
        g = _geom_for_pref([props.get("id_new"), props.get("id_old"), props.get("route_code_base")])
        # Estilo para visores (no altera geometría)
        style = _style_aliases(props.get("color"), width=2, opacity=1.0)
        feat = {
            "type": "Feature",
            "geometry": g,
            "properties": {**props, **style, "label": props.get("tag", "")}
        }
        st = props.get("status")
        if st == "modern":
            feats_modern.append(feat)
        elif st == "old_only":
            feats_old_only.append(feat)
        else:
            feats_unmatched.append(feat)

    # 4) Escribir salidas
    out_dir = root / "scripts" / "output"
    out_dir.mkdir(parents=True, exist_ok=True)
    p_modern = out_dir / "transporte.modern.geojson"
    p_old    = out_dir / "transporte.old_only.geojson"
    p_unm    = out_dir / "transporte.unmatched.geojson"

    p_modern.write_text(json.dumps({"type": "FeatureCollection", "features": feats_modern}, ensure_ascii=False), encoding="utf-8")
    p_old.write_text(json.dumps({"type": "FeatureCollection", "features": feats_old_only}, ensure_ascii=False), encoding="utf-8")
    p_unm.write_text(json.dumps({"type": "FeatureCollection", "features": feats_unmatched}, ensure_ascii=False), encoding="utf-8")

    print(f"✔ Particionado transporte → modernas: {p_modern} (features={len(feats_modern)})")
    print(f"✔ Particionado transporte → solo antiguas: {p_old} (features={len(feats_old_only)})")
    print(f"✔ Particionado transporte → sin match: {p_unm} (features={len(feats_unmatched)})")

    # Diagnóstico rápido (opcional)
    for sample in ["CR02","CR13","1135"]:
        sg = geom_index.get(sample)
        if sg:
            print(f"• Ejemplo {sample}: geometría indexada con {len(sg.get('coordinates', []))} segmentos.")
    return p_modern, p_old, p_unm

# ========================= OSM → GeoJSON (relations) + extractor plano =========================
ALLOWED_ROUTE_VALUES = {"bus","trolleybus","minibus","tram","light_rail","train","subway","share_taxi","taxi","ferry"}

# ========= Limpieza de properties de OSM (solo campos útiles) =========
KEEP = {"ref","name","from","to","operator","route","description"}
DROP = {"source","created_by","opening_hours","phone","email","website",
        "wikidata","wikipedia","short_name","alt_name","old_name",
        "check_date","survey:date","start_date","end_date","maxspeed","max_speed"}
DROP_P = ("addr:","contact:","gnis:","tiger:","seamark:","source:")

def clean_osm_tags(tags: dict) -> dict:
    out={}
    for k,v in (tags or {}).items():
        if k in KEEP: out[k]=v; continue
        if k in DROP: continue
        if any(k.startswith(p) for p in DROP_P): continue
    return out

def is_overpass(d): return isinstance(d, dict) and isinstance(d.get("elements"), list)

def build_indexes(elems):
    nodes, ways, rels = {}, {}, []
    for el in elems:
        t = el.get("type")
        if t == "node": nodes[el["id"]] = el
        elif t == "way": ways[el["id"]]  = el
        elif t == "relation": rels.append(el)
    return nodes, ways, rels

def coords_from_way(way, nodes_by_id):
    if not way: return []
    if way.get("geometry"):
        return [[pt["lon"], pt["lat"]] for pt in way["geometry"]]
    coords=[]
    for nid in way.get("nodes", []):
        n = nodes_by_id.get(nid)
        if n: coords.append([n["lon"], n["lat"]])
    return coords

def coords_from_member(m, ways_by_id, nodes_by_id):
    """Acepta geometry embebida del member; o enlaza a way top-level; o parsea '_fullGeom123'."""
    if m.get("type") != "way": return []
    if m.get("geometry"):  # geometry embebida del member
        return [[pt["lon"], pt["lat"]] for pt in m["geometry"]]
    ref = m.get("ref")
    if isinstance(ref, int) and ref in ways_by_id:
        return coords_from_way(ways_by_id[ref], nodes_by_id)
    if isinstance(ref, str):
        mm = re.search(r"(\d+)$", ref)
        if mm:
            wid = int(mm.group(1))
            if wid in ways_by_id:
                return coords_from_way(ways_by_id[wid], nodes_by_id)
    return []

# ========= Paradas (para añadir puntos de parada en el GeoJSON) =========
STOP_ROLES = {"stop","platform","stop_entry_only","platform_entry_only","stop_exit_only","platform_exit_only"}

def stop_feats_from_relation(rel, nodes_by_id, color):
    feats = []
    for m in rel.get("members", []):
        if m.get("type") != "node":
            continue
        role = (m.get("role") or "").lower()
        if role not in STOP_ROLES:
            continue
        node = nodes_by_id.get(m.get("ref"))
        if not node:
            continue
        feats.append({
            "type": "Feature",
            "geometry": {"type": "Point", "coordinates": [node["lon"], node["lat"]]},
            "properties": {
                "kind": "stop",
                "marker-color": color,
                "marker-symbol": "bus",
                "_osm_type": "node",
                "_osm_id": node["id"],
            },
        })
    return feats


def route_key_for(rel):
    """Clave estable de color: prefiero ref; luego name; luego id."""
    tags = rel.get("tags") or {}
    ref  = (tags.get("ref") or "").strip()
    name = (tags.get("name") or "").strip()
    return ref or name or str(rel["id"])

def color_for_route(rel, index_map):
    rid  = rel["id"]
    tags = rel.get("tags") or {}
    ref  = (tags.get("ref") or "").strip()
    # prioridad: overrides → regex → paleta/mod
    if rid in ROUTE_COLOR_OVERRIDES_RELID:
        return ROUTE_COLOR_OVERRIDES_RELID[rid]
    if ref and ref in ROUTE_COLOR_OVERRIDES_REF:
        return ROUTE_COLOR_OVERRIDES_REF[ref]
    for rx, col in ROUTE_COLOR_RULES:
        if ref and rx.search(ref):
            return col
    idx = index_map[route_key_for(rel)] % len(PALETTE)
    return PALETTE[idx]


def _as_linestring_coords(geom):
    coords = []
    for p in (geom or []):
        if isinstance(p, dict) and "lat" in p and "lon" in p:
            try:
                coords.append([float(p["lon"]), float(p["lat"])])
            except Exception:
                continue
    return coords

def build_geojson_from_osm_transporte(transporte_json: dict) -> dict:
    elements = transporte_json.get("elements") or []
    ways = {}
    for el in elements:
        if el.get("type") == "way" and "geometry" in el:
            coords = _as_linestring_coords(el.get("geometry"))
            if coords:
                ways[int(el.get("id"))] = coords
    feats = []
    for el in elements:
        if el.get("type") != "relation":
            continue
        tags = el.get("tags") or {}
        if (tags.get("type") or "").lower() != "route":
            continue
        route_kind = (tags.get("route") or tags.get("route_master") or "").lower()
        lines = []
        for m in (el.get("members") or []):
            if (m.get("type") or "") == "way":
                ref_id = m.get("ref") or m.get("id")
                try:
                    ref_id = int(ref_id)
                except Exception:
                    ref_id = None
                if ref_id is not None and ref_id in ways:
                    lines.append(ways[ref_id])
        geometry = {"type":"MultiLineString","coordinates":lines} if lines else None
        props = {
            "id": el.get("id"),
            "ref": tags.get("ref") or "",
            "title": tags.get("name") or "",
            "name": tags.get("name") or "",
            "route": route_kind,
            "operator": tags.get("operator") or "",
            "from": tags.get("from") or "",
            "to": tags.get("to") or "",
            "network": tags.get("network") or "",
            "tags": tags,
        }
        feats.append({"type":"Feature","geometry":geometry,"properties":props})
    return {"type":"FeatureCollection","features":feats}

def convert_osm_transporte_to_relations_geojson(matcher: RouteMatcher, out_path: Path = None) -> dict:
    src = matcher.config_dir.parent / "data" / "raw" / "osm" / "transporte.json"
    if not src.exists():
        raise FileNotFoundError(str(src))
    data = json.loads(src.read_text(encoding="utf-8"))
    if isinstance(data, dict) and data.get("type") in ("FeatureCollection","Feature"):
        fc = data if data.get("type") == "FeatureCollection" else {"type":"FeatureCollection","features":[data]}
    else:
        fc = build_geojson_from_osm_transporte(data)
    if out_path is None:
        out_path = matcher.config_dir.parent / "scripts" / "output" / "relations.geojson"
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(fc, ensure_ascii=False), encoding="utf-8")
    print(f"✔ OSM → GeoJSON relations → {out_path}")
    return fc

def extract_relations_from_osm_transporte(obj: dict) -> list[dict]:
    if isinstance(obj, dict) and obj.get("type") in ("FeatureCollection","Feature"):
        feats = obj.get("features", []) if obj.get("type") == "FeatureCollection" else [obj]
        rels = []
        for ft in feats:
            p = (ft or {}).get("properties", {}) or {}
            if not p:
                continue
            rels.append({
                "id": p.get("id"),
                "ref": p.get("ref") or "",
                "title": p.get("name") or p.get("title") or "",
                "route": (p.get("route") or "").lower(),
                "operator": p.get("operator") or "",
                "from": p.get("from") or "",
                "to": p.get("to") or "",
                "network": p.get("network") or "",
                "tags": p.get("tags") or {},
            })
        return rels
    elements = (obj or {}).get("elements") or []
    rels = []
    for el in elements:
        if (el or {}).get("type") != "relation":
            continue
        tags = el.get("tags") or {}
        if (tags.get("type") or "").lower() != "route":
            continue
        rels.append({
            "id": el.get("id"),
            "ref": el.get("tags", {}).get("ref") or "",
            "title": el.get("tags", {}).get("name") or "",
            "route": (tags.get("route") or tags.get("route_master") or "").lower(),
            "operator": tags.get("operator") or "",
            "from": tags.get("from") or "",
            "to": tags.get("to") or "",
            "network": tags.get("network") or "",
            "tags": tags,
        })
    return rels

# ========================= IO (relations/geojson) =========================

def pick_ref(props: Dict[str, Any], fields: Tuple[str, ...] = ("ref","title","name")) -> str:
    for f in fields:
        v = props.get(f)
        if v:
            return str(v)
    v = props.get("id")
    return str(v) if isinstance(v, str) else ""

def _geom_for(ids: List[str], geom_index: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    for code in ids:
        if not code:
            continue
        key = _fix_family_prefix(str(code).upper())
        g = geom_index.get(key)
        if g:
            return g
    return None

def build_geojson_from_relations(relations: List[Dict[str, Any]],
                                 matcher: RouteMatcher,
                                 ref_fields: Tuple[str, ...] = ("ref","title"),
                                 rutas_geo: Optional[Path] = None) -> Dict[str, Any]:
    """
    Construye un GeoJSON a partir de una lista de relaciones:
    - NO inventa geometría: la busca en un GeoJSON de rutas (autodetectado o --in-rutas-geo)
    - Solo modifica color (normalizado + alias de estilo) y nombre de tags (añade 'label' = tag)
    """
    # índice de geometrías
    root = matcher.config_dir.parent
    if rutas_geo is None:
        rutas_geo = _autodetect_rutas_geo(root)
    geom_index: Dict[str, Dict[str, Any]] = {}
    if rutas_geo and Path(rutas_geo).exists():
        geom_index = build_geometry_index(Path(rutas_geo))

    feats = []
    for r in relations:
        props = dict(r)
        ref_raw = _clean(props.get("ref") or props.get("title") or props.get("name") or "")
        if not ref_raw:
            ref_raw = re.sub(r"[_-](I|V|E|X)$", "", _clean(props.get("id")), flags=re.I)
        enrich = matcher.enrich_properties(ref_raw)
        props.update(enrich)

        # === NUEVO: forzar ref/title con equivalencias ===
        _apply_equivalence_ref_title(props)

        # geometría: preferimos id_new, luego id_old, luego base del ref
        g = _geom_for([props.get("route_code"), props.get("old_code"), props.get("route_code_base")], geom_index)

        # solo propiedades: estilo/alias y label
        style = _style_aliases(props.get("color"))
        props.update(style)
        props["label"] = props.get("tag", "")

        feats.append({"type":"Feature","geometry":g,"properties":props})
    return {"type":"FeatureCollection","features":feats}

def enrich_geojson_file(in_path: Path, out_path: Path, matcher: RouteMatcher,
                        ref_fields: Tuple[str, ...] = ("ref","title")) -> Dict[str, Any]:
    obj = json.loads(in_path.read_text(encoding="utf-8"))
    feats = obj.get("features", []) if obj.get("type")=="FeatureCollection" else ([obj] if obj.get("type")=="Feature" else [])
    if not feats:
        raise ValueError("GeoJSON inválido.")
    for ft in feats:
        props = ft.setdefault("properties", {})
        ref_raw = _clean(pick_ref(props, ref_fields))
        props.update(matcher.enrich_properties(ref_raw))

        # === NUEVO: forzar ref/title con equivalencias ===
        _apply_equivalence_ref_title(props)

        # solo propiedades: alias de estilo + label; NO tocamos geometría existente
        style = _style_aliases(props.get("color"))
        props.update(style)
        props["label"] = props.get("tag", "")
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fc = {"type":"FeatureCollection","features":feats}
    out_path.write_text(json.dumps(fc, ensure_ascii=False), encoding="utf-8")
    return fc

def write_equivalences_csv(fc: Dict[str, Any], out_csv: Path):
    rows = []
    for ft in fc["features"]:
        p = ft.get("properties", {})
        rows.append({
            "ref_raw": p.get("ref_raw",""),
            "route_code": p.get("route_code",""),
            "old_code": p.get("old_code",""),
            "match_source": p.get("match_source",""),
            "match_method": p.get("match_method",""),
            "alias": p.get("alias",""),
            "origen": p.get("origen",""),
            "destino": p.get("destino",""),
            "empresa": p.get("empresa",""),
            "color": p.get("color",""),
            "id": p.get("id",""),
            "title": p.get("title",""),
            "ref": p.get("ref",""),
            "name": p.get("name",""),
            "route_code_base": p.get("route_code_base",""),
            "family_ok": p.get("family_ok", False),
        })
    out_csv.parent.mkdir(parents=True, exist_ok=True)
    if rows:
        with out_csv.open("w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
            w.writeheader()
            w.writerows(rows)

# ========================= Reporte robusto =========================

def print_match_report(fc: Dict[str, Any]) -> None:
    feats = fc.get("features", [])
    print("\n=== REPORTE DE EQUIVALENCIAS POR RUTA (tras limpieza) ===")
    if not feats:
        print("(sin features)")
        return
    counts = Counter((ft.get("properties", {}).get("match_method", "") or "") for ft in feats)
    total = sum(counts.values())
    order = ["new_code","old_code","heuristic_old","old_only",""]
    print("Método           | N   | %")
    print("-----------------+-----+------")
    for k in order + [x for x in counts.keys() if x not in order]:
        if k in counts:
            n = counts[k]
            pct = (100.0 * n / total) if total else 0.0
            label = k or "(vacío)"
            print(f"{label:<16} | {n:>3} | {pct:5.1f}")

    priority = {"new_code": 4, "old_code": 3, "heuristic_old": 2, "old_only": 1, "": 0}

    def key_from_props(p: Dict[str, Any]) -> str:
        key = _clean(p.get("route_code")) or _clean(p.get("old_code"))
        if not key:
            key = norm_ref(_clean(p.get("ref_raw","")))[0] or _clean(p.get("ref","")) or _clean(p.get("title","")) or _clean(p.get("name","")) or str(p.get("id",""))
        return key or "(sin_clave)"

    groups: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
    for ft in feats:
        p = ft.get("properties", {})
        groups[key_from_props(p)].append(p)

    sorted_groups = sorted(groups.items(), key=lambda kv: len(kv[1]), reverse=True)
    if sorted_groups:
        print("\nEjemplos (hasta 10 grupos más poblados):")
    shown = 0
    for gkey, items in sorted_groups:
        if shown >= 10:
            break
        choice = max(items, key=lambda p: priority.get(p.get("match_method",""), 0)) if items else {}
        method = _clean(choice.get("match_method",""))
        src = _clean(choice.get("match_source",""))
        rc = _clean(choice.get("route_code",""))
        oc = _clean(choice.get("old_code",""))
        print(f"- {gkey}: best=method={method or '(vacío)'} src={src or '(n/a)'} new={rc or '-'} old={oc or '-'} (items={len(items)})")
        shown += 1

# ========================= autodetección & fallback =========================
# Preferimos *.geojson antes que *.json
DEF_INPUTS = [
    Path("scripts/output/relations.geojson"),
    Path("scripts/output/relations.json"),
    Path("data/rutas_lineas.geojson"),
    Path("data/relations.geojson"),
    Path("data/relations.json"),
    Path("relations.geojson"),
    Path("relations.json"),
]

def autodetect_input() -> Tuple[Optional[Path], Optional[Path]]:
    for p in DEF_INPUTS:
        if p.exists():
            if p.suffix.lower() == ".geojson":
                return p, None
            if p.suffix.lower() == ".json":
                try:
                    obj = json.loads(p.read_text(encoding="utf-8"))
                    if isinstance(obj, dict) and obj.get("type") in ("FeatureCollection","Feature"):
                        return p, None
                    else:
                        return None, p
                except Exception:
                    return None, p
    return None, None

# ========================= impresión por base (compacto) =========================

def _iter_features(fc_or_feat):
    if not isinstance(fc_or_feat, dict):
        return []
    t = fc_or_feat.get("type")
    if t == "FeatureCollection":
        return fc_or_feat.get("features", [])
    if t == "Feature":
        return [fc_or_feat]
    return []

def _fallback_clean_ref(ref_raw: str) -> str:
    s = (ref_raw or "").strip().upper()
    s = re.sub(r"^X-", "", s)
    s = s.replace(" ", "").replace("_", "-")
    s = re.sub(r"[-/](I|V|X)$", "", s)
    return s

def _fallback_base_from_ref(ref_raw: str) -> str:
    s = _fallback_clean_ref(ref_raw)
    s = re.sub(r"[-_](I|V|X)$", "", s)
    return s

def print_detected_routes(fc_or_feat):
    features = _iter_features(fc_or_feat)
    if not features:
        print("(No hay features para reportar)")
        return
    from collections import defaultdict
    groups = defaultdict(list)
    base_from_ref = globals().get("base_from_ref", _fallback_base_from_ref)
    for feat in features:
        props = feat.get("properties", {}) or {}
        base = props.get("route_code_base") or base_from_ref(props.get("ref_raw") or props.get("ref") or "")
        props["route_code_base"] = base
        groups[base].append(props)
    prio = {"new_code": 0, "old_code": 1, "heuristic_old": 2, "old_only": 3, "": 9, None: 9}
    print("\n=== Rutas detectadas por base (mejor equivalencia por línea) ===")
    for base in sorted(groups.keys()):
        items = groups[base]
        def score(p):
            return prio.get(p.get("match_method"), 9)
        chosen = sorted(items, key=lambda p: (score(p), p.get("route_code") or p.get("old_code") or "ZZZ"))[0]
        method = chosen.get("match_method") or "none"
        src = chosen.get("match_source") or ""
        route_code = chosen.get("route_code") or "—"
        old_code = chosen.get("old_code") or ""
        alias = chosen.get("alias") or chosen.get("Alias") or ""
        empresa = chosen.get("empresa") or chosen.get("Empresa") or ""
        origen = chosen.get("origen") or chosen.get("Origen") or ""
        destino = chosen.get("destino") or chosen.get("Destino") or ""
        rel_id = chosen.get("rel_id") or chosen.get("relation_id") or ""
        fam_ok = chosen.get("family_ok")
        extra = []
        if old_code and (old_code != route_code): extra.append(f"ant={old_code}")
        if alias: extra.append(f"alias={alias}")
        if empresa: extra.append(f"emp={empresa}")
        if origen or destino: extra.append(f"{origen}->{destino}")
        if src: extra.append(f"src={src}")
        if fam_ok is False: extra.append("FAMILIA*=")
        extras = (" | "+"; ".join(extra)) if extra else ""
        print(f"- {base:12s} → {route_code:6s} [{method}] rel={rel_id}{extras}")

# --------- Fallback: catálogo desde CSVs (sin geometría) ---------
def build_index_geojson_from_catalog(matcher: RouteMatcher) -> Dict[str, Any]:
    feats = []

    # Rutas actuales (modernizadas)
    for _, r in matcher.df_actual.iterrows():
        new_code = _fix_family_prefix(_clean(r.get("new_code","")).upper())
        if not new_code:
            continue
        color = _norm_hex(r.get("new_color","") or r.get("ruta_color",""))
        tag = _choose_tag(r.to_dict())
        props = {
            "id": None,
            "ref": new_code,
            "title": new_code,
            "route": "bus",
            "operator": r.get("empresa",""),
            "from": r.get("origen",""),
            "to": r.get("destino",""),
            "network": "",
            "id_old": _fix_family_prefix(_clean(r.get("old_code","")).upper()),
            "id_new": new_code,
            "color": color,
            "tag": tag,
            "label": tag,
            "status": "modern",
            "route_code_base": new_code,
            "family_ok": is_allowed_family(new_code),
        }
        props.update(_style_aliases(color))
        feats.append({"type":"Feature","geometry":None,"properties":props})

    # Rutas anteriores sin match
    known_old = set(_fix_family_prefix(_clean(x).upper()) for x in matcher.df_actual["old_code"] if _clean(x))
    for _, r in matcher.df_prev.iterrows():
        old_code = _fix_family_prefix(_clean(r.get("old_code","")).upper())
        if not old_code or old_code in known_old:
            continue
        color = _norm_hex(r.get("old_color",""))
        tag = _choose_tag(r.to_dict())
        props = {
            "id": None,
            "ref": old_code,
            "title": old_code,
            "route": "bus",
            "operator": r.get("empresa",""),
            "from": r.get("origen",""),
            "to": r.get("destino",""),
            "network": "",
            "id_old": old_code,
            "id_new": "",
            "color": color,
            "tag": tag,
            "label": tag,
            "status": "old_only",
            "route_code_base": old_code,
            "family_ok": is_allowed_family(old_code),
        }
        props.update(_style_aliases(color))
        feats.append({"type":"Feature","geometry":None,"properties":props})

    return {"type":"FeatureCollection","features":feats}

# ========================= main =========================

# --- utilidades de geometría: NO modificar geometrías existentes, solo completar si faltan ---
def _geom_is_empty(g: Dict[str, Any] | None) -> bool:
    if not g:
        return True
    t = g.get("type")
    if t == "MultiLineString":
        coords = g.get("coordinates") or []
        return len(coords) == 0 or all((not seg) for seg in coords)
    if t == "LineString":
        return not (g.get("coordinates") or [])
    # para otros tipos, considera vacío (no los usamos)
    return True

def _geom_for(ids: List[str], geom_index: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    for code in ids:
        if not code:
            continue
        key = _fix_family_prefix(str(code).upper())
        g = geom_index.get(key)
        if g:
            return g
    return None


def build_geojson_from_osm_file(in_osm_path: Path, matcher: RouteMatcher) -> Dict[str, Any]:
    """
    Lee data/raw/osm/transporte.json (Overpass), arma geometría cuando exista en el JSON;
    si NO hay geometría, la completa desde un índice de rutas (rutas_lineas.geojson, etc).
    Solo cambia color/tag de properties; NUNCA modifica geometrías existentes.
    """
    raw = json.loads(in_osm_path.read_text(encoding="utf-8"))

    # 1) Construye FeatureCollection desde Overpass (con geometría si vino 'geometry' en ways)
    if isinstance(raw, dict) and raw.get("type") in ("Feature", "FeatureCollection"):
        fc = raw if raw.get("type") == "FeatureCollection" else {"type": "FeatureCollection", "features": [raw]}
    else:
        fc = build_geojson_from_osm_transporte(raw)  # intenta armar MultiLineString desde 'ways'

    # 2) Índice de geometrías externo (para completar SOLO si falta)
    root = matcher.config_dir.parent
    rutas_geo = _autodetect_rutas_geo(root)
    geom_index: Dict[str, Dict[str, Any]] = {}
    if rutas_geo and Path(rutas_geo).exists():
        geom_index = build_geometry_index(Path(rutas_geo))

    # 3) Enriquecer properties (color normalizado + label) y completar geometría si está vacía
    for ft in fc.get("features", []):
        props = ft.setdefault("properties", {})
        ref_raw = _clean(pick_ref(props, ("ref", "title", "name")))
        props.update(matcher.enrich_properties(ref_raw))

        # === NUEVO: forzar ref/title con equivalencias ===
        _apply_equivalence_ref_title(props)

        # color + aliases (para visores) y alias de nombre
        style = _style_aliases(props.get("color"))
        props.update(style)
        props["label"] = props.get("tag", "")

        # SOLO si la geometría viene vacía, trata de completarla desde el índice externo
        if _geom_is_empty(ft.get("geometry")) and geom_index:
            g = _geom_for([props.get("id_new"), props.get("id_old"), props.get("route_code_base")], geom_index)
            if g:
                ft["geometry"] = g  # completar, no reemplazar si ya existía

    return fc

# ========================= main =========================
def main(argv: Optional[List[str]] = None):
    import argparse
    parser = argparse.ArgumentParser(
        description="Enriquecer GeoJSON con equivalencias (modernas/actuales/anteriores) y particionar transporte.",
        add_help=True,
    )
    g_in = parser.add_mutually_exclusive_group(required=False)
    g_in.add_argument("--in-geojson", type=Path, help="GeoJSON de entrada (Feature/FeatureCollection)")
    g_in.add_argument("--in-relations", type=Path, help="JSON con lista de relations [{id, ref, ...}]")
    # 🔹 NUEVO: soporte para Overpass directamente
    g_in.add_argument("--in-osm", type=Path, help="Overpass JSON de transporte (ej: data/raw/osm/transporte.json)")

    parser.add_argument("--out", type=Path, required=False, help="Salida GeoJSON. Si no se especifica, se deriva del nombre de entrada.")
    parser.add_argument("--config-root", type=Path, default=None, help="Carpeta 'config' o raíz del repo que la contiene. Si no se pasa, se buscará hacia arriba desde CWD.")
    parser.add_argument("--ref-fields", default="ref,title", help="Campos de properties donde buscar el ref, separados por coma. Ej: 'ref,title,name'")
    parser.add_argument("--in-transporte", type=Path, default=None, help="Lista convertida de relaciones OSM (data/raw/converted/transporte/transporte.json)")
    parser.add_argument("--in-rutas-geo", type=Path, default=None, help="GeoJSON de rutas con geometría para dibujar las salidas")

    if argv is None:
        argv = []
    args, _unknown = parser.parse_known_args(argv)

    matcher = RouteMatcher(args.config_root)
    print(f"• Usando config: {matcher.config_dir}")

    # Autodetección simple cuando no pasan flags: preferimos el Overpass por defecto
    in_geo, in_rel, in_osm = args.in_geojson, args.in_relations, args.in_osm
    if not in_geo and not in_rel and not in_osm:
        osm_default = matcher.config_dir.parent / "data" / "raw" / "osm" / "transporte.json"
        if osm_default.exists():
            in_osm = osm_default
            print(f"• Entrada autodetectada (OSM): {in_osm}")
        else:
            ag, ar = autodetect_input()
            in_geo = ag or in_geo
            in_rel = ar or in_rel
            if ag or ar:
                print(f"• Entrada autodetectada: {ag or ar}")

    out_equiv = Path("scripts/output/route_equivalences.csv")
    out_equiv.parent.mkdir(parents=True, exist_ok=True)

    # --- 1) NUEVO: Overpass → GeoJSON con geometría y properties enriquecidas ---
    if in_osm:
        # === Nuevo pipeline: imprimir listado, generar GeoJSON y CSV estilo convert.ipynb ===
        raw = json.loads(in_osm.read_text(encoding="utf-8"))
        if not is_overpass(raw):
            raise ValueError("El archivo --in-osm no parece Overpass JSON (falta 'elements').")

        els = raw["elements"]
        nodes_by_id, ways_by_id, relations = build_indexes(els)
        bus_rels = [r for r in relations if (r.get("tags") or {}).get("route") == "bus"]

        # Índice estable para la paleta (por clave de ruta)
        keys_sorted = sorted({route_key_for(r) for r in bus_rels}, key=lambda x: (x is None, x))
        index_map = {k: i for i, k in enumerate(keys_sorted)}  # clave → índice

        # ======== PRINT: listado de TODAS las rutas con color ========
        def _print_detected_routes_palette(rels, index_map):
            rows = []
            for r in rels:
                tags = r.get("tags") or {}
                ref = (tags.get("ref") or "").strip()
                name = (tags.get("name") or "").strip()
                col = color_for_route(r, index_map)
                rows.append((ref, name, r["id"], col))
            # ordenar por ref, luego name
            rows.sort(key=lambda t: ((t[0] or ""), (t[1] or ""), t[2]))
            print(f"Total rutas: {len(rows)}")
            for ref, name, rid, col in rows:
                label = ref or name or f"rel/{rid}"
                print(f"- {label}  | id={rid}  | color={col}")
            return rows

        _print_detected_routes_palette(bus_rels, index_map)

        # ======== Construcción GeoJSON (rutas + paradas) ========
        features = []
        palette_rows = []

        for rel in bus_rels:
            tags = rel.get("tags") or {}
            ref = (tags.get("ref") or "").strip()
            name = (tags.get("name") or "").strip()
            color = color_for_route(rel, index_map)

            # tramos (acepta geometry embebida)
            lines = []
            for m in rel.get("members", []):
                if (m.get("type") or "") != "way":
                    continue
                coords = coords_from_member(m, ways_by_id, nodes_by_id)
                if coords:
                    lines.append(coords)

            if lines:
                base = {**clean_osm_tags(tags), "_osm_type": "relation", "_osm_id": rel["id"]}
                features.append({
                    "type": "Feature",
                    "geometry": {"type": "MultiLineString", "coordinates": lines},
                    "properties": {
                        **base,
                        "kind": "route",
                        "title": (name or ref or f"rel/{rel['id']}") ,
                        "stroke": color,
                        "stroke-width": 4,
                        "stroke-opacity": 1.0,
                    },
                })

            # paradas si las hay
            features += stop_feats_from_relation(rel, nodes_by_id, color)

            # fila para CSV de mapeo
            palette_rows.append({
                "relation_id": rel["id"],
                "route_key": route_key_for(rel),
                "ref": ref,
                "name": name,
                "color": color,
                "n_segments": len(lines),
            })

        # ========= Guardar convertidos =========
        root = matcher.config_dir.parent
        out_dir = root / "data" / "raw" / "converted" / "transporte"
        out_dir.mkdir(parents=True, exist_ok=True)
        out_json = out_dir / "transporte.json"
        out_geo = out_dir / "transporte.geojson"
        out_csv = out_dir / "transporte_palette_map.csv"

        geojson = {"type": "FeatureCollection", "features": features}
        for out in (out_json, out_geo):
            out.write_text(json.dumps(geojson, ensure_ascii=False, indent=2), encoding="utf-8")
            print("✔ Guardado:", out)

        with out_csv.open("w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=["relation_id", "route_key", "ref", "name", "color", "n_segments"])
            w.writeheader(); w.writerows(palette_rows)
        print("✔ Guardado:", out_csv)

        # ========= Resumen =========
        n_routes = sum(1 for f in features if f.get("properties", {}).get("kind") == "route")
        n_stops = sum(1 for f in features if f.get("properties", {}).get("kind") == "stop")
        print(f"Resumen → rutas: {n_routes} | paradas: {n_stops}")
        print(f"Paleta usada: {len(PALETTE)} colores (módulo por índice estable)")
        print("Overrides por ID:", len(ROUTE_COLOR_OVERRIDES_RELID), "| overrides por ref:", len(ROUTE_COLOR_OVERRIDES_REF), "| reglas regex:", len(ROUTE_COLOR_RULES))

        # === Además: generar el enriquecido como antes ===
        fc_osm = build_geojson_from_osm_file(in_osm, matcher)
        out_geo_enr = args.out or (in_osm.parent / f"{in_osm.stem}.enriquecido.geojson")
        out_geo_enr.parent.mkdir(parents=True, exist_ok=True)
        out_geo_enr.write_text(json.dumps(fc_osm, ensure_ascii=False), encoding="utf-8")
        print(f"✔ GeoJSON enriquecido (desde OSM) → {out_geo_enr}")
        write_equivalences_csv(fc_osm, out_equiv)
        print(f"✔ Equivalencias → {out_equiv}")
        print_detected_routes(fc_osm)
        print_match_report(fc_osm)

        # (opcional) también dejo hechos los 3 GeoJSON particionados si existe converted:
        try:
            rutas_geo = args.in_rutas_geo if args.in_rutas_geo else None
            transporte_path = args.in_transporte if args.in_transporte else None
            partition_transporte_to_geojsons(matcher, rutas_geo=rutas_geo, transporte_path=transporte_path)
        except FileNotFoundError:
            pass
        return

    # --- 2) relations (lista) → GeoJSON (geometría tomada del índice) ---
    if in_rel:
        relations = json.loads(in_rel.read_text(encoding="utf-8"))
        if not isinstance(relations, list):
            raise SystemExit("--in-relations debe ser una lista JSON de objetos.")
        ref_fields = tuple([_clean(x) for x in args.ref_fields.split(",") if _clean(x)])
        fc = build_geojson_from_relations(relations, matcher, ref_fields=ref_fields, rutas_geo=args.in_rutas_geo)
        out_geo = args.out or (in_rel.parent / f"{in_rel.stem}.geojson")
        out_geo.parent.mkdir(parents=True, exist_ok=True)
        Path(out_geo).write_text(json.dumps(fc, ensure_ascii=False), encoding="utf-8")
        print(f"✔ GeoJSON desde relations (con geometría) → {out_geo}")
        write_equivalences_csv(fc, out_equiv)
        print(f"✔ Equivalencias → {out_equiv}")
        print_detected_routes(fc)
        print_match_report(fc)

    # --- 3) GeoJSON de entrada → enriquecido (sin tocar geometría) ---
    elif in_geo:
        ref_fields = tuple([_clean(x) for x in args.ref_fields.split(",") if _clean(x)])
        out_geo = args.out or (in_geo.parent / f"{in_geo.stem}.enriquecido.geojson")
        fc = enrich_geojson_file(in_geo, out_geo, matcher, ref_fields=ref_fields)
        print(f"✔ GeoJSON enriquecido → {out_geo}")
        write_equivalences_csv(fc, out_equiv)
        print(f"✔ Equivalencias → {out_equiv}")
        print_detected_routes(fc)
        print_match_report(fc)

    # --- 4) Fallback (si no hay nada) ---
    else:
        try:
            fc_rel = convert_osm_transporte_to_relations_geojson(matcher)
            rels = extract_relations_from_osm_transporte(json.loads((matcher.config_dir.parent / "data" / "raw" / "osm" / "transporte.json").read_text(encoding="utf-8")))
            fc_geo = build_geojson_from_relations(rels, matcher, rutas_geo=None)
            out_geo = Path("scripts/output/relations.enriquecido.geojson")
            out_geo.parent.mkdir(parents=True, exist_ok=True)
            out_geo.write_text(json.dumps(fc_geo, ensure_ascii=False), encoding="utf-8")
            print(f"✔ GeoJSON enriquecido (fallback) → {out_geo}")
            write_equivalences_csv(fc_geo, out_equiv)
            print(f"✔ Equivalencias → {out_equiv}")
            print_detected_routes(fc_geo)
            print_match_report(fc_geo)
        except FileNotFoundError:
            print("• No se detectó entrada (relations/geojson). Generando catálogo desde los CSV...")
            fc = build_index_geojson_from_catalog(matcher)
            out_geo = Path("scripts/output/rutas_index.geojson")
            out_geo.parent.mkdir(parents=True, exist_ok=True)
            out_geo.write_text(json.dumps(fc, ensure_ascii=False), encoding="utf-8")
            print(f"✔ Catálogo GeoJSON → {out_geo}")
            write_equivalences_csv(fc)
            print(f"✔ Equivalencias → {out_equiv}")
            print_detected_routes(fc)
            print_match_report(fc)

    # Particionado si hay converted
    try:
        rutas_geo = args.in_rutas_geo if args.in_rutas_geo else None
        transporte_path = args.in_transporte if args.in_transporte else None
        partition_transporte_to_geojsons(matcher, rutas_geo=rutas_geo, transporte_path=transporte_path)
    except FileNotFoundError:
        pass

if __name__ == "__main__":
    main()


• Usando config: D:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\config
• Entrada autodetectada (OSM): D:\ARCHIVOS\OneDrive\Documents\UNI\Cursos adicionales\!CTIC\JavaScript\Proyectos\Rutas\data\raw\osm\transporte.json
Total rutas: 207
- rel/3778984  | id=3778984  | color=#e377c2
- rel/4193367  | id=4193367  | color=#7f7f7f
- rel/4193500  | id=4193500  | color=#bcbd22
- rel/4252750  | id=4252750  | color=#17becf
- rel/4455259  | id=4455259  | color=#4e79a7
- rel/4455850  | id=4455850  | color=#f28e2b
- rel/4467480  | id=4467480  | color=#59a14f
- rel/4782800  | id=4782800  | color=#e15759
- rel/4787549  | id=4787549  | color=#76b7b2
- rel/4789418  | id=4789418  | color=#edc948
- rel/4820596  | id=4820596  | color=#b07aa1
- Airport Express Lima  | id=16812606  | color=#ff9da7
- CR13_I  | id=4511802  | color=#9c755f
- CR14_I  | id=4512067  | color=#bab0ab
- CR23_I  | id=4512296  | color=#8dd3c7
- IM49_I  | id=4521427  | color=#bebada
- IM50_I  | id=

# Wikiroutes

In [1]:
# -*- coding: utf-8 -*-

"""
Wikiroutes → GeoJSON (rutas por ciudad) vía API oficial (RapidAPI)
- Extrae listado de rutas para una ciudad (por slug, p.ej. "lima").
- Para cada ruta, intenta obtener geometría en GeoJSON o polyline y la decodifica.
- Exporta: routes.geojson y, opcionalmente, stops.geojson (si el endpoint lo permite).
NOTA: Ajusta los nombres exactos de endpoints/params según la consola de RapidAPI.
"""

import os, json, time, sys
from typing import Dict, Any, List, Optional
import requests

RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY") or "PON_AQUI_TU_RAPIDAPI_KEY"
RAPIDAPI_HOST = "wikiroutes-api.p.rapidapi.com"

# Configura tu ciudad
CITY_SLUG = "lima"         # slug visible en https://wikiroutes.info/en/lima/catalog
INCLUDE_STOPS = True       # si quieres exportar paraderos (si el endpoint lo provee)
PAGE_SIZE = 200            # ajusta si la API pagina resultados
SLEEP_BETWEEN = 0.15       # antiflood suave

# ====== Endpoints (ajusta según los nombres de la API en RapidAPI) ======
# En la lista de endpoints verás nombres muy parecidos a estos:
EP_LIST_ROUTES   = "/routes"            # p.ej. GET /routes?citySlug=lima&limit=...
EP_ROUTE_DETAILS = "/route"             # p.ej. GET /route?routeId=12345  (detalles + geometry/dirs)
EP_ROUTE_STOPS   = "/routeStops"        # p.ej. GET /routeStops?routeId=12345 (si existe)
# (Si tu versión de API ofrece otros nombres —/routesInCity, /line, /lineStops, etc.— cámbialos aquí.)

# ====== Utiles ======
def _headers() -> Dict[str, str]:
    return {
        "X-RapidAPI-Key": RAPIDAPI_KEY,
        "X-RapidAPI-Host": RAPIDAPI_HOST,
        "Accept": "application/json",
    }

def _get(path: str, params: Dict[str, Any]) -> Dict[str, Any]:
    url = f"https://{RAPIDAPI_HOST}{path}"
    r = requests.get(url, headers=_headers(), params=params, timeout=60)
    r.raise_for_status()
    return r.json()

# ---- polyline decoder (si la geometría viene codificada) ----
# Soporta polyline5/polyline6. Si tu endpoint devuelve GeoJSON directo, no se usa.
def decode_polyline(polyline: str, precision: int = 5) -> List[List[float]]:
    coords, index, lat, lon = [], 0, 0, 0
    factor = 10 ** precision
    length = len(polyline)

    def _decode():
        nonlocal index
        result, shift = 0, 0
        while True:
            if index >= length:
                raise ValueError("Polyline truncated")
            b = ord(polyline[index]) - 63
            index += 1
            result |= (b & 0x1f) << shift
            shift += 5
            if b < 0x20:
                break
        return ~(result >> 1) if (result & 1) else (result >> 1)

    while index < length:
        dlat = _decode()
        dlon = _decode()
        lat += dlat
        lon += dlon
        coords.append([lon / factor, lat / factor])
    return coords

# ---- GeoJSON helpers ----
def feature_line(coords: List[List[float]], props: Dict[str, Any]) -> Dict[str, Any]:
    return {"type":"Feature", "geometry":{"type":"LineString","coordinates":coords}, "properties":props}

def feature_point(coord: List[float], props: Dict[str, Any]) -> Dict[str, Any]:
    return {"type":"Feature", "geometry":{"type":"Point","coordinates":coord}, "properties":props}

# ====== Extracción ======
def list_routes(city_slug: str) -> List[Dict[str, Any]]:
    # Intenta paginar si la API lo requiere
    routes = []
    page, more = 1, True
    while more:
        params = {"citySlug": city_slug, "limit": PAGE_SIZE, "page": page}
        try:
            data = _get(EP_LIST_ROUTES, params)
        except requests.HTTPError as e:
            print(f"[WARN] Falló list_routes page={page}: {e}", file=sys.stderr)
            break

        # Ajusta estas claves según la respuesta real
        batch = data.get("routes") or data.get("items") or data.get("data") or []
        routes.extend(batch)

        total = (data.get("total") or len(batch))
        got   = page * PAGE_SIZE
        more  = len(batch) > 0 and got < total
        page += 1
        time.sleep(SLEEP_BETWEEN)
    return routes

def get_route_geo(route_id: Any) -> Dict[str, Any]:
    """
    Devuelve:
    {
      "features": [Feature, ...],   # una o dos (ida / vuelta)
      "stops": [FeaturePoint, ...]  # opcional
    }
    Interpreta varios formatos habituales: GeoJSON directo o polyline5/6.
    """
    out = {"features": [], "stops": []}
    data = _get(EP_ROUTE_DETAILS, {"routeId": route_id})

    # --- propiedades base ---
    meta = {
        "route_id": data.get("id") or route_id,
        "short_name": data.get("shortName") or data.get("code"),
        "long_name":  data.get("longName") or data.get("name"),
        "operator":   (data.get("operator") or {}).get("name"),
        "source":     "wikiroutes_api",
    }

    # --- geometría ---
    # Algunos APIs devuelven: {"directions": [{"name":"outbound","geometry":{...}}, ...]}
    # Otros: {"geometry":{"type":"LineString"...}} o {"polyline":"...","polylinePrecision":6}
    def _add_geom(geom: Any, sentido: str):
        if not geom: return
        if isinstance(geom, dict) and geom.get("type") == "LineString":
            out["features"].append(feature_line(geom["coordinates"], {**meta, "sentido": sentido}))
        elif isinstance(geom, dict) and geom.get("type") == "MultiLineString":
            # une como varias features
            for coords in geom["coordinates"]:
                out["features"].append(feature_line(coords, {**meta, "sentido": sentido}))
        elif isinstance(geom, str):
            # asumimos polyline; precision 6 si la respuesta lo indica
            prec = data.get("polylinePrecision") or 5
            coords = decode_polyline(geom, precision=prec)
            out["features"].append(feature_line(coords, {**meta, "sentido": sentido}))

    if "directions" in data:
        for d in data["directions"]:
            sentido = (d.get("name") or d.get("direction") or "ida").lower()
            g = d.get("geometry") or d.get("polyline")
            _add_geom(g, sentido)
    else:
        g = data.get("geometry") or data.get("polyline")
        _add_geom(g, "desconocido")

    # --- paraderos opcionales ---
    if INCLUDE_STOPS:
        try:
            sdata = _get(EP_ROUTE_STOPS, {"routeId": route_id})
            stops = sdata.get("stops") or sdata.get("items") or []
            for s in stops:
                # soporta distintos nombres de campos
                lon = s.get("lon") or s.get("lng") or (s.get("location") or {}).get("lon")
                lat = s.get("lat") or (s.get("location") or {}).get("lat")
                if lon is None or lat is None: 
                    continue
                props = {
                    "route_id": meta["route_id"],
                    "stop_id": s.get("id"),
                    "stop_name": s.get("name"),
                    "source": "wikiroutes_api"
                }
                out["stops"].append(feature_point([lon, lat], props))
        except requests.HTTPError:
            pass

    return out

def main():
    if RAPIDAPI_KEY.startswith("PON_AQUI"):
        print("Configura RAPIDAPI_KEY en tu entorno.", file=sys.stderr)
        sys.exit(1)

    print(f"Listando rutas de ciudad: {CITY_SLUG} …")
    routes = list_routes(CITY_SLUG)
    print(f"Rutas encontradas: {len(routes)}")

    fc_routes = {"type":"FeatureCollection","features":[]}
    fc_stops  = {"type":"FeatureCollection","features":[]}

    for r in routes:
        rid = r.get("id") or r.get("routeId")
        if rid is None:
            continue
        try:
            bundle = get_route_geo(rid)
        except requests.HTTPError as e:
            print(f"[WARN] Detalles fallidos route_id={rid}: {e}", file=sys.stderr)
            continue

        fc_routes["features"].extend(bundle["features"])
        if INCLUDE_STOPS and bundle["stops"]:
            fc_stops["features"].extend(bundle["stops"])

        time.sleep(SLEEP_BETWEEN)

    with open("routes.geojson","w",encoding="utf-8") as f:
        json.dump(fc_routes, f, ensure_ascii=False)
    if INCLUDE_STOPS:
        with open("stops.geojson","w",encoding="utf-8") as f:
            json.dump(fc_stops, f, ensure_ascii=False)

    print("Listo: routes.geojson", "y stops.geojson" if INCLUDE_STOPS else "")

if __name__ == "__main__":
    main()


Configura RAPIDAPI_KEY en tu entorno.


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
