In [1]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd

REPO_ROOT = Path("..").resolve()
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from apclust import run_mcss_ap

DATA_ROOT = REPO_ROOT / "Data"
synthetic_path = DATA_ROOT / "samples_original_1_for_GaussMix4_DiagCovar_highVar_2Kdim_n1K.csv"
synthetic = pd.read_csv(synthetic_path, header=None)
X = synthetic.to_numpy(dtype=np.float64)

OUTPUT_BASE = REPO_ROOT / "results" / "mcss" / "synthetic_gaussmix4_highvar"
OUTPUT_BASE.mkdir(parents=True, exist_ok=True)

for damping in (0.50, 0.85):
    run_mcss_ap(
        X,
        dataset_name=f"damping{damping:.2f}",
        out_dir=OUTPUT_BASE,
        b=200,
        train_frac=0.80,
        random_seed=0,
        ap_params={
            "damping": damping,
            "max_iter": 400,
            "convergence_iter": 50,
            "random_state": 0,
            "affinity": "euclidean",
        },
        metrics=("ari", "ami"),
    )



In [2]:
import pandas as pd

for damping in (0.50, 0.85):
    summary = pd.read_csv(OUTPUT_BASE / f"damping{damping:.2f}" / "mcss_summary.csv")
    mean_k = summary["train_n_clusters"].mean()
    var_k = summary["train_n_clusters"].var()
    mode_k = summary["train_n_clusters"].mode()[0]
    mode_pct = (summary["train_n_clusters"] == mode_k).mean() * 100
    conv_pct = summary["train_converged"].mean() * 100

    print(f"damping={damping:.2f}")
    print(f"  E[K]      : {mean_k:.3f}")
    print(f"  Var[K]    : {var_k:.3f}")
    print(f"  Mode[K]   : {mode_k} ({mode_pct:.1f} %)")
    print(f"  %Converged: {conv_pct:.1f} %\n")

damping=0.50
  E[K]      : 14.095
  Var[K]    : 0.840
  Mode[K]   : 14 (46.0 %)
  %Converged: 99.0 %

damping=0.85
  E[K]      : 14.140
  Var[K]    : 0.935
  Mode[K]   : 14 (46.0 %)
  %Converged: 100.0 %

