In [1]:
import pickle
import numpy as np

# ============================================================
# 0) MASTER SANITY CHECK (la función)
# ============================================================
def sanity_check_tortuous_vs_nontortuous(
    edges,
    r_edge,
    edge_length,
    points_by_edge,
    r_point,
    lengths2_by_edge,
    tol=1e-6, n = 500
):

    edges_list = list(edges)
    total_available = len(edges_list) 

    if n and n < total_available:
        import random 
        selected_edges = random.sample(edges_list, n)
        print(f"Analyzing random set with {n} edges")
    else: 
        selected_edges = edges_list
        print(f"Analyzing random set with {total_available} edges")
        
    print("\n==============================")
    print(" SANITY CHECK: MAX vs MEAN vs CSV ")
    print("==============================\n")

    diff_r_max = []   # Para confirmar que r_edge es el máximo
    diff_r_mean = []  # Para ver cuánto perdemos al usar la media

    D_edge = []        # Diámetro del CSV (No tortuoso, suele ser el MAX)
    D_tort_mean = []   # Diámetro basado en la MEDIA real de puntos
    D_tort_max = []    # Diámetro basado en el MÁXIMO real de puntos

    L_edge_all = []
    L2_all = []

    for e in selected_edges:
        if e not in points_by_edge or e not in r_edge:
            continue

        pts = points_by_edge[e]
        if pts is None or len(pts) < 2:
            continue

        # 1. Radios de los puntos (geometría real)
        r_pts = np.array([r_point[p] for p in pts], dtype=float)

        r_e = float(r_edge[e])      # El del archivo reducido (CSV)
        r_max = float(np.max(r_pts)) # El máximo de los puntos
        r_mean = float(np.mean(r_pts)) # La media de los puntos

        # Guardamos diferencias
        diff_r_max.append(r_e - r_max)   # Debería ser ~0 si r_e es el máximo
        diff_r_mean.append(r_e - r_mean) # La "inflación" por usar el máximo

        L_edge = float(edge_length[e])
        L2 = float(np.sum(lengths2_by_edge[e]))

        D_edge.append(2.0 * r_e)
        D_tort_mean.append(2.0 * r_mean)
        D_tort_max.append(2.0 * r_max)
        L_edge_all.append(L_edge)
        L2_all.append(L2)

    # --- Cálculos de Medias Ponderadas (Weighted Mean) ---
    def weighted_mean(vals, weights):
        v = np.array(vals); w = np.array(weights)
        return np.sum(v * w) / np.sum(w) if np.sum(w) > 0 else np.nan

    D_csv_w  = weighted_mean(D_edge, L_edge_all)
    D_mean_w = weighted_mean(D_tort_mean, L2_all)
    D_max_w  = weighted_mean(D_tort_max, L2_all)

    print(":one: CONSISTENCIA DEL ALGORITMO (CSV vs Geometría)")
    print(f"Error (r_edge - max(r_pts)): mean={np.mean(diff_r_max):.6f} (Si es ~0, confirmamos MAX)")
    print(f"Sesgo (r_edge - mean(r_pts)): mean={np.mean(diff_r_mean):.6f} (Cuánto 'engorda' el vaso)\n")

    print(":two: COMPARATIVA DE DIÁMETROS REGIONALES (Ponderados por longitud)")
    print(f"DIÁMETRO CSV (Reduced/Max): {D_csv_w:.6f} µm")
    print(f"DIÁMETRO REAL (Mean/Tort): {D_mean_w:.6f} µm")
    print(f"DIÁMETRO REAL (Max/Tort):  {D_max_w:.6f} µm\n")

    print(":brain: NOTA PARA GAIA:")
    bias_pct = ((D_csv_w - D_mean_w) / D_mean_w) * 100
    print(f"Usar el grafo reducido (MAX) sobreestima el calibre en un {bias_pct:.2f}% frente a la media real.")

    return {"D_mean_w": D_mean_w, "D_max_w": D_max_w}


# ============================================================
# 1) Helpers: carga + inspección + selección automática attrs
# ============================================================
def load_pkl(path):
    with open(path, "rb") as f:
        return pickle.load(f)

def print_data_summary(name, data):
    print(f"\n================ {name} SUMMARY ================")
    print("Top-level keys:", sorted(list(data.keys())))

    G = data.get("graph", None)
    if G is None:
        print("❌ data['graph'] no existe")
        return

    print(f"Graph: v={G.vcount()}  e={G.ecount()}")
    print("Vertex attributes:", G.vs.attributes())
    print("Edge attributes  :", G.es.attributes())

def pick_first_existing_attr(attr_list, candidates):
    for c in candidates:
        if c in attr_list:
            return c
    return None

def build_edge_map_from_attr(G, attr_name):
    """Devuelve dict {edge_id: value} para un atributo de edge."""
    out = {}
    for e in G.es:
        out[e.index] = e[attr_name]
    return out

def ensure_edge_ids_consistent(keys_a, keys_b, label_a="A", label_b="B"):
    sa, sb = set(keys_a), set(keys_b)
    inter = sa & sb
    only_a = sa - sb
    only_b = sb - sa
    print(f"\n--- EdgeID consistency {label_a} vs {label_b} ---")
    print(f"Common edges: {len(inter)}")
    print(f"Only {label_a}: {len(only_a)}")
    print(f"Only {label_b}: {len(only_b)}")
    return inter, only_a, only_b


# ============================================================
# 2) CONFIG: pon aquí tus dos archivos
# ============================================================
p_tort = "/home/admin/Ana/MicroBrain/output/graph_18_OutGeom.pkl"
p_non  = "/home/admin/Ana/MicroBrain/18_igraph.pkl"  # <-- cambia al nombre real


# ============================================================
# 3) LOAD + INSPECT
# ============================================================
G_non  = load_pkl(p_non)
data_tort = load_pkl(p_tort)


# ============================================================
# 4) Extract graphs
# ============================================================

G_tort = data_tort["graph"]

# ============================================================
# 5) Detect edge radius / length attributes en NON (source of truth)
# ============================================================
edge_attrs_non = G_non.es.attributes()

radius_candidates = ["r_edge", "radius_edge", "radius", "r", "rad", "radius_um", "r_um", "diameter", "diam", "diam_um"]
length_candidates = ["length", "len", "length_um", "L", "edge_length", "length_geom", "length_eucl"]

rad_attr = pick_first_existing_attr(edge_attrs_non, radius_candidates)
len_attr = pick_first_existing_attr(edge_attrs_non, length_candidates)

print("\n================ ATTRIBUTE PICKING ================")
print("NON edge attrs:", edge_attrs_non)
print("Picked radius attr:", rad_attr)
print("Picked length attr:", len_attr)

if rad_attr is None:
    raise KeyError("No encuentro atributo de radio en edges del NON. Mira la lista 'Edge attributes' e indica el correcto.")
if len_attr is None:
    raise KeyError("No encuentro atributo de length en edges del NON. Mira la lista 'Edge attributes' e indica el correcto.")

# Si detectó 'diameter' como radio, lo convertimos a radio
is_diameter = rad_attr in ["diameter", "diam", "diam_um"]

r_edge = build_edge_map_from_attr(G_non, rad_attr)
if is_diameter:
    r_edge = {k: float(v) / 2.0 for k, v in r_edge.items()}

edge_length = build_edge_map_from_attr(G_non, len_attr)


# ============================================================
# 6) Detect tortuous structures: points_by_edge, radii_geom, lengths2_by_edge
# ============================================================
print("\n================ TORTUOUS KEYS PICKING ================")
tkeys = data_tort.keys()
print("TORT top keys:", sorted(list(tkeys)))

# candidatos típicos
pbe_candidates = ["points_by_edge", "geom", "points", "edge_points", "polyline_points_by_edge", "edge_to_points"]
rpt_candidates = ["radii_geom", "radius_point", "r_point", "radii_point", "radius_p", "r_geom"]
l2_candidates  = ["lengths2_by_edge", "lengths2", "seg_lengths2_by_edge", "lengths_tort_by_edge"]

pbe_key = pick_first_existing_attr(list(tkeys), pbe_candidates)
rpt_key = pick_first_existing_attr(list(tkeys), rpt_candidates)
l2_key  = pick_first_existing_attr(list(tkeys), l2_candidates)

print("Picked points_by_edge key:", pbe_key)
print("Picked r_point key      :", rpt_key)
print("Picked lengths2 key     :", l2_key)

if pbe_key is None:
    raise KeyError("No encuentro points_by_edge en TORT. Mira keys y pon el nombre correcto.")
if rpt_key is None:
    raise KeyError("No encuentro radii por punto (radii_geom) en TORT. Mira keys y pon el nombre correcto.")
if l2_key is None:
    raise KeyError("No encuentro lengths2_by_edge en TORT. Mira keys y pon el nombre correcto.")

points_by_edge = data_tort[pbe_key]
r_point = data_tort[rpt_key]
lengths2_by_edge = data_tort[l2_key]

# Normaliza: por si vienen como listas indexadas por edge_id en vez de dict
def normalize_edge_container(x, name):
    if isinstance(x, dict):
        return x
    if isinstance(x, (list, tuple)):
        # asumimos que x[edge_id] existe
        return {i: x[i] for i in range(len(x))}
    raise TypeError(f"{name} tiene tipo raro: {type(x)}")

points_by_edge = normalize_edge_container(points_by_edge, "points_by_edge")
lengths2_by_edge = normalize_edge_container(lengths2_by_edge, "lengths2_by_edge")

# ============================================================
# 7) Edge ID consistency check
# ============================================================
common_edges, only_non, only_tort = ensure_edge_ids_consistent(
    r_edge.keys(), points_by_edge.keys(), "NON(r_edge)", "TORT(points_by_edge)"
)
common_edges2, _, _ = ensure_edge_ids_consistent(
    edge_length.keys(), lengths2_by_edge.keys(), "NON(edge_length)", "TORT(lengths2_by_edge)"
)

edges = sorted(list(common_edges & common_edges2))
print(f"\n✅ Using {len(edges)} common edges for sanity check.")


# ============================================================
# 8) RUN SANITY CHECK
# ============================================================
out = sanity_check_tortuous_vs_nontortuous(
    edges=edges,
    r_edge=r_edge,
    edge_length=edge_length,
    points_by_edge=points_by_edge,
    r_point=r_point,
    lengths2_by_edge=lengths2_by_edge,
)

# Si quieres ver cuáles son los edges malos:
if len(out["bad_edges"]) > 0:
    print("First 20 bad_edges:", out["bad_edges"][:20])



NON edge attrs: ['connectivity', 'nkind', 'radius', 'diameter', 'length']
Picked radius attr: radius
Picked length attr: length

TORT top keys: ['geom', 'graph', 'vertex']
Picked points_by_edge key: None
Picked r_point key      : None
Picked lengths2 key     : None


KeyError: 'No encuentro points_by_edge en TORT. Mira keys y pon el nombre correcto.'