In [1]:
from ReduMetrics.metrics.ulse import ulse_score_sklearn
from ReduMetrics.metrics.rta import rta_score
from ReduMetrics.metrics.spearman import spearman_correlation
from ReduMetrics.metrics.k_ncp import kncp_score
from ReduMetrics.metrics.cdc import cdc_score

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os 

from sklearn.datasets import make_blobs, make_swiss_roll, make_s_curve, make_moons, make_circles

# Parámetros globales
SEED = 1234
rng = np.random.default_rng(SEED)

# Tamaños por defecto 
M = 1500   # muestras
N = 20     # dimensión alta
C = 5      # clases (cuando aplica)
R = 2      # dimensión reducida

# Parámetros de métricas 
K_DEFAULT = 10        # ULSE / k-NCP
T_TRIPLETS = 10_000   # RTA
P_PAIRS = 10_000      # Spearman 

# Utilidad: formateo de tablas
pd.options.display.float_format = lambda v: f"{v:.4f}"

In [3]:
def standardize_columns(X: np.ndarray) -> np.ndarray:
    """Z-score por columna."""
    mu = X.mean(axis=0, keepdims=True)
    sd = X.std(axis=0, keepdims=True)
    sd[sd == 0] = 1.0
    return (X - mu) / sd

def random_orthogonal_matrix(d: int, seed: int) -> np.ndarray:
    """Matriz ortogonal Q vía QR a partir de gaussiana."""
    rng = np.random.default_rng(seed)
    A = rng.normal(size=(d, d))
    Q, _ = np.linalg.qr(A)
    return Q

def make_base_blobs(m=M, n=N, centers=C, cluster_std=1.2, seed=SEED):
    X, y = make_blobs(n_samples=m, n_features=n, centers=centers,
                      cluster_std=cluster_std, random_state=seed)
    return X.astype(float), y.astype(int)

def compute_all_metrics(X_high, X_low, labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED):
    """Devuelve un dict con las cinco métricas sobre (X_high, X_low, labels)."""
    ulse = ulse_score_sklearn(X_high, X_low, k=k)
    rta  = rta_score(X_high, X_low, T=T, random_state=seed)
    spr  = spearman_correlation(X_high, X_low, P=P, random_state=seed)
    kncp = kncp_score(X_high, X_low, labels)
    cdc  = cdc_score(X_high, X_low, labels)
    return {"ULSE": float(ulse), "RTA": float(rta), "Spearman": float(spr),
            "k-NCP": float(kncp), "CDC": float(cdc)}

def barplot_metrics(df_row, title, fname=None, outdir=".", show=False, dpi=200):
    fig, ax = plt.subplots(figsize=(6, 4))
    names = df_row.index.tolist()
    vals  = df_row.values.astype(float)
    ax.bar(names, vals)
    ax.set_ylim(0, 1)
    ax.set_ylabel("score")
    ax.set_title(title)
    for i, v in enumerate(vals):
        ax.text(i, min(0.97, v + 0.02), f"{v:.3f}", ha="center", va="bottom", fontsize=9)

    if fname is not None:
        os.makedirs(outdir, exist_ok=True)
        path = os.path.join(outdir, f"{fname}.png")
        fig.savefig(path, dpi=dpi, bbox_inches="tight")

    if show:
        plt.show()
    else:
        plt.close(fig)

def lineplot_with_band(x, y_mean, y_std, title, xlabel="σ", ylabel="score",
                       ylim=(0,1), fname=None, outdir=".", show=False, dpi=300):
    """Curva con banda ±1 std; permite guardar a disco."""
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.plot(x, y_mean, marker="o")
    ax.fill_between(x, y_mean - y_std, y_mean + y_std, alpha=0.2)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if ylim is not None:
        ax.set_ylim(*ylim)
    ax.set_title(title)

    if fname is not None:
        os.makedirs(outdir, exist_ok=True)
        fig.savefig(os.path.join(outdir, f"{fname}.png"), dpi=dpi, bbox_inches="tight")

    if show:
        plt.show()
    else:
        plt.close(fig)


In [None]:
# Datos base con clases
Xh_raw, labels = make_blobs(n_samples=M, n_features=N, centers=C,
                            cluster_std=1.0, random_state=SEED)
Xh = standardize_columns(Xh_raw)  

# Isometrías
# Identidad
Xlow_ident = Xh.copy()

# Traslación: sumar vector constante
b = np.full((1, N), 0.5)
Xlow_trans = Xh + b

# Escala uniforme: multiplicar por escalar positivo
s = 2.0
Xlow_scale = s * Xh

# Rotación en el plano 
theta = np.deg2rad(30.0)  # 30 grados
R2 = np.array([[np.cos(theta), -np.sin(theta)],
               [np.sin(theta),  np.cos(theta)]], dtype=float)
Q = np.eye(N)
Q[:2, :2] = R2
Xlow_rot2d = Xh @ Q

# Evaluar métricas
results_A1 = {
    "Identidad":        compute_all_metrics(Xh, Xlow_ident,  labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED),
    "Traslación":       compute_all_metrics(Xh, Xlow_trans,  labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED),
    "Escala uniforme":  compute_all_metrics(Xh, Xlow_scale,  labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED),
    "Rotación (plano)": compute_all_metrics(Xh, Xlow_rot2d,  labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED),
}

df_A1 = pd.DataFrame(results_A1).T
df_A1


Unnamed: 0,ULSE,RTA,Spearman,k-NCP,CDC
Identidad,1.0,1.0,1.0,1.0,1.0
Traslación,1.0,1.0,1.0,1.0,1.0
Escala uniforme,1.0,1.0,1.0,1.0,1.0
Rotación (plano),1.0,1.0,1.0,1.0,1.0


In [None]:
outdir_A1 = "./figs_A1"

name_map = {
    "Identidad":        "A1_identidad",
    "Traslación":       "A1_traslacion",
    "Escala uniforme":  "A1_escala",
    "Rotación (plano)": "A1_rotacion",
}

for caso in df_A1.index:
    fname = name_map.get(caso, f"A1_{caso.lower().replace(' ', '_')}")
    barplot_metrics(
        df_row=df_A1.loc[caso],
        title=f"A1 · {caso}",
        fname=fname,
        outdir=outdir_A1,
        show=False,   
        dpi=300       
    )


In [None]:

# ## 6.1 · A2 — Permutación de correspondencias


m = Xh.shape[0]
perm = rng.permutation(m)

Xlow_perm = Xh[perm]

# Métricas observadas
obs = compute_all_metrics(Xh, Xlow_perm, labels, k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS, seed=SEED)

# Valores de referencia 
k = K_DEFAULT
C_ = np.unique(labels).size
expected = {
    "ULSE": k / (m - 1),
    "RTA": 0.5,
    "Spearman": 0.0,
    "k-NCP": k / (C_ - 1),
    "CDC": 0.0
}

df_A2 = pd.DataFrame({"Observado": obs, "Esperado (azar)": expected})
df_A2


Unnamed: 0,Observado,Esperado (azar)
ULSE,0.0065,0.0067
RTA,0.5028,0.5
Spearman,0.0012,0.0
k-NCP,0.0,2.5
CDC,0.3091,0.0


In [7]:
outdir_A2 = "./figs_A2"
os.makedirs(outdir_A2, exist_ok=True)

name_map = {
    "ULSE": "A2_ulse",
    "RTA": "A2_rta",
    "Spearman": "A2_spearman",
    "k-NCP": "A2_kncp",
    "CDC": "A2_cdc",
}

for metric in df_A2.index:
    vals = df_A2.loc[metric]
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.bar(["Observado", "Esperado"], [vals["Observado"], vals["Esperado (azar)"]])
    ax.set_ylim(0, 1)
    ax.set_title(f"A2 · {metric}")
    for i, v in enumerate([vals["Observado"], vals["Esperado (azar)"]]):
        ax.text(i, min(0.97, v + 0.02), f"{v:.3f}", ha="center", va="bottom", fontsize=9)

    # guardar y cerrar
    fname = name_map.get(metric, f"A2_{metric.lower().replace(' ', '_')}")
    fig.savefig(os.path.join(outdir_A2, f"{fname}.png"), dpi=300, bbox_inches="tight")
    plt.close(fig)


In [None]:
# ## 6.1 · A3 — Ruido controlado en el espacio reducido


sigmas = [0.0, 0.05, 0.10, 0.20, 0.50]
REPEATS = 5  

base_scale = Xh.std()

records = []
for sigma in sigmas:
    vals_rep = []
    for rep in range(REPEATS):
        eps = np.random.default_rng(SEED + 200 + rep).normal(scale=sigma * base_scale, size=Xh.shape)
        Xlow_noisy = Xh + eps
        vals_rep.append(compute_all_metrics(Xh, Xlow_noisy, labels,
                                            k=K_DEFAULT, T=T_TRIPLETS, P=P_PAIRS,
                                            seed=SEED + rep))
    mean = {f"{k}_mean": float(np.mean([v[k] for v in vals_rep])) for k in vals_rep[0]}
    std  = {f"{k}_std": float(np.std([v[k] for v in vals_rep], ddof=1)) for k in vals_rep[0]}
    records.append({"sigma": sigma, **mean, **std})

df_A3 = pd.DataFrame(records).set_index("sigma")
df_A3


Unnamed: 0_level_0,ULSE_mean,RTA_mean,Spearman_mean,k-NCP_mean,CDC_mean,ULSE_std,RTA_std,Spearman_std,k-NCP_std,CDC_std
sigma,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
0.05,0.7531,0.979,0.998,1.0,1.0,0.003,0.0006,0.0001,0.0,0.0
0.1,0.5507,0.9585,0.9922,1.0,1.0,0.0068,0.0017,0.0002,0.0,0.0
0.2,0.2832,0.9226,0.971,1.0,0.9976,0.006,0.0021,0.0008,0.0,0.0054
0.5,0.0807,0.8372,0.8663,1.0,0.9952,0.0018,0.0045,0.0029,0.0,0.0066


In [None]:
outdir_A3 = "./figuras"  
name_map = {
    "ULSE":     "A3_ulse_sigma",
    "RTA":      "A3_rta_sigma",
    "Spearman": "A3_spearman_sigma",
    "k-NCP":    "A3_kncp_sigma",
    "CDC":      "A3_cdc_sigma",
}

for metric in ["ULSE", "RTA", "Spearman", "k-NCP", "CDC"]:
    y_mean = df_A3[f"{metric}_mean"].values
    y_std  = df_A3[f"{metric}_std"].values
    lineplot_with_band(
        df_A3.index.values, y_mean, y_std,
        title=f"A3 · {metric} vs σ",
        xlabel="σ", ylabel=metric, ylim=(0,1),
        fname=name_map[metric], outdir=outdir_A3, show=False, dpi=300
    )

