In [7]:
import numpy    as np
import pandas   as pd
import networkx as nx

import matplotlib.pyplot as plt

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors     import kneighbors_graph

In [9]:
df = pd.read_csv("../data/processed/photometric_z2.5_sn_25.csv")
df.describe()

Unnamed: 0,Z,PSFMAG_u,PSFMAG_g,PSFMAG_r,PSFMAG_i,EXTINCTION_u,EXTINCTION_g,EXTINCTION_r,EXTINCTION_i,SN_MEDIAN_ALL,...,K_g,K_r,K_i,M_u,M_g,M_r,M_i,u_g,g_r,r_i
count,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,...,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0,7298.0
mean,1.138728,18.144534,17.872423,17.695044,17.565628,0.156711,0.120526,0.084277,0.062922,32.255556,...,-0.408867,-0.173439,-0.142357,-25.439366,-25.686871,-26.06343,-26.202573,0.247505,0.376559,0.139144
std,0.66982,0.751794,0.662493,0.618687,0.636729,0.096885,0.074476,0.052091,0.038913,6.796239,...,0.200819,0.199274,0.194424,1.933796,1.888503,1.872146,1.879929,0.23075,0.165269,0.101036
min,0.004153,14.247252,14.388747,14.459476,14.0847,0.017132,0.013349,0.009235,0.006862,25.002308,...,-0.816864,-0.455711,-0.398128,-29.783957,-29.880597,-30.286828,-30.476412,-4.891333,-0.232115,-3.280621
25%,0.536489,17.688221,17.467079,17.315396,17.19043,0.085192,0.065432,0.045869,0.034211,27.191056,...,-0.570073,-0.324743,-0.346856,-26.827658,-27.102444,-27.480433,-27.632001,0.126207,0.285317,0.083524
50%,1.107271,18.161829,17.923318,17.74471,17.625888,0.130728,0.100506,0.070412,0.052516,30.148148,...,-0.409984,-0.293603,-0.141941,-25.977119,-26.207394,-26.555546,-26.694335,0.204084,0.349801,0.133505
75%,1.693,18.615182,18.306258,18.107947,17.985466,0.205886,0.158152,0.11076,0.08279,35.453175,...,-0.324973,0.042944,0.02017,-24.413951,-24.646785,-24.997003,-25.090485,0.311446,0.431586,0.187788
max,2.498,24.634121,25.070984,21.162504,23.304535,0.555323,0.432704,0.299343,0.223591,76.683449,...,0.008939,0.286896,0.249128,-13.053094,-13.338057,-13.50581,-13.797227,5.559997,5.470358,0.916723


In [None]:
# ------------ 1) Construcción del grafo (a partir de df) -----------------
def build_graph_from_df(
    df,
    feat_cols=("u_g","g_r","r_i"),
    k=10,
    metric="minkowski",  # euclidean = minkowski p=2
    p=2,
    mutual=False,        # union (False) o mutual kNN (True)
    weight_fn="inv1p",   # "inv1p" o "gauss"
    sigma=None,          # si weight_fn="gauss"
):
    # features estandarizadas
    X = StandardScaler().fit_transform(df[list(feat_cols)].to_numpy())

    # grafo kNN con distancias
    A = kneighbors_graph(X, n_neighbors=k, mode="distance", metric=metric, p=p).tocsr()

    # simetrización
    A = A.minimum(A.T) if mutual else A.maximum(A.T)
    A.eliminate_zeros()

    # pesos
    d = A.data.copy()
    if weight_fn == "inv1p":
        w = 1.0 / (1.0 + d)
        
    elif weight_fn == "gauss":
        if sigma is None:
            # usa la mediana de la distancia al k-ésimo vecino como escala
            kth = kneighbors_graph(X, n_neighbors=k, mode="distance",
                                   metric=metric, p=p).max(axis=1).A1
            sigma = np.median(kth)
        w = np.exp(-(d**2)/(2*sigma**2))
    else:
        raise ValueError("weight_fn debe ser 'inv1p' o 'gauss'")

    rows, cols = A.nonzero()
    G = nx.Graph()
    G.add_weighted_edges_from(zip(rows, cols, w))

    # componente gigante
    giant_nodes = max(nx.connected_components(G), key=len)
    H = G.subgraph(giant_nodes).copy()

    return G, H

# ------------ 2) Selección de N nodos para visualizar --------------------
def sample_subgraph(H, N=None, strategy="random", center=None, kcore_hint=None):
    """
    strategy:
      - 'random': muestra uniforme de nodos
      - 'degree': top-N por grado
      - 'bfs': bola alrededor de 'center' por BFS (útil para “zoom” local)
      - 'kcore': toma el k-core más grande <= N (aprox usando kcore_hint)
    """
    n = H.number_of_nodes()
    if (N is None) or (N >= n):
        return H

    rng = np.random.default_rng()

    if strategy == "random":
        nodes = rng.choice(list(H.nodes), size=N, replace=False)
        return H.subgraph(nodes).copy()

    if strategy == "degree":
        deg = sorted(H.degree(), key=lambda x: x[1], reverse=True)[:N]
        nodes = [u for u,_ in deg]
        return H.subgraph(nodes).copy()

    if strategy == "bfs":
        if center is None:
            # centro por mayor grado
            center = max(H.degree, key=lambda x: x[1])[0]
        nodes = []
        for u in nx.bfs_tree(H, center):
            nodes.append(u)
            if len(nodes) >= N:
                break
        return H.subgraph(nodes).copy()

    if strategy == "kcore":
        # buscamos un k que deje ~N nodos
        if kcore_hint is None:
            k_low, k_high = 1, int(max(dict(H.degree()).values()))
        else:
            k_low, k_high = 1, max(1, kcore_hint)
        best_core = H
        for k in range(k_low, k_high+1):
            C = nx.k_core(H, k)
            if C.number_of_nodes() == 0:
                break
            if C.number_of_nodes() <= N:
                best_core = C
            else:
                break
        # si aún tiene >N, recorta aleatoriamente
        if best_core.number_of_nodes() > N:
            nodes = rng.choice(list(best_core.nodes), size=N, replace=False)
            return best_core.subgraph(nodes).copy()
        return best_core.copy()

    raise ValueError("strategy no reconocido.")

# ------------ 3) Layouts de posicionamiento --------------------------------
def compute_layout(G, method="spring", seed=42, weight="weight"):
    if method == "spring":
        return nx.spring_layout(G, seed=seed, weight=weight)
    if method == "kk":
        return nx.kamada_kawai_layout(G, weight=weight)
    if method == "spectral":
        return nx.spectral_layout(G, weight=weight)
    raise ValueError("method debe ser 'spring', 'kk' o 'spectral'")

# ------------ 4) Dibujo compacto ------------------------------------------
import matplotlib.pyplot as plt

def draw_graph(G, pos, color_array=None, cmap="viridis", node_size=6,
               edge_alpha=0.06, edge_width=0.25, title=None):
    plt.figure(figsize=(9,9))
    nx.draw_networkx_edges(G, pos, alpha=edge_alpha, width=edge_width)
    if color_array is None:
        nx.draw_networkx_nodes(G, pos, node_size=node_size)
        sc = None
    else:
        sc = nx.draw_networkx_nodes(G, pos, node_size=node_size,
                                    node_color=color_array, cmap=cmap)
        cbar = plt.colorbar(sc); cbar.set_label("z")
    if title:
        plt.title(title)
    plt.axis("off")
    plt.tight_layout()
    plt.show()


In [None]:
# 1) Construye el grafo una vez
G, H = build_graph_from_df(df, feat_cols=("u_g","g_r","r_i"), k=10, mutual=False, weight_fn='gauss')

In [None]:
# 1) una vez
pos_full = nx.kamada_kawai_layout(H, weight="weight")

# 2) cada vez que muestres N nodos
S = sample_subgraph(H, N=1500, strategy="random")

pos_S = {n: pos_full[n] for n in S.nodes()}
z_S = df.loc[list(S.nodes), "Z"].to_numpy()

draw_graph(S, pos_S, color_array=z_S, title="Subgrafo N=1500 (pos fijo)")
