In [1]:
import sys
from pathlib import Path

repo_root = Path("..").resolve()
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

import numpy as np
from sklearn.metrics import adjusted_rand_score

from apclust import (
    load_study_panels,
    StudyMatrices,
    run_ap_grid,
)

In [2]:
data_dir = repo_root / "Data"

mainz_panels = load_study_panels(data_dir / "mainz_dict.npy")
trans_panels = load_study_panels(data_dir / "transbig_dict.npy")
vdx_panels = load_study_panels(data_dir / "vdx_dict.npy")

hk_studies = StudyMatrices(
    mainz=mainz_panels["HK_3"],
    transbig=trans_panels["HK_3"],
    vdx=vdx_panels["HK_3"],
)

In [None]:
from apclust import run_multiple_ap, summarize_ap_runs

In [5]:
# Mainz
runs = run_multiple_ap(
    hk_studies.mainz,
    n_runs=3,
    preference=None,
    damping=0.5,
    random_state=None,
)
summary = summarize_ap_runs(runs)
summary

Unnamed: 0,run,input_preference,preference,damping,max_iter,convergence_iter,n_clusters,converged,n_iter,exemplar_count,runtime,labels_equal_ref,ari_to_ref
0,1,,-6.394521,0.5,200,15,14,True,26,14,0.015854,True,1.0
1,2,,-6.394521,0.5,200,15,14,True,26,14,0.008797,True,1.0
2,3,,-6.394521,0.5,200,15,14,True,26,14,0.007559,True,1.0


In [7]:
runs = run_multiple_ap(
    hk_studies.transbig,
    n_runs=3,
    preference=None,
    damping=0.5,
    random_state=None,
)
summary = summarize_ap_runs(runs)
summary

Unnamed: 0,run,input_preference,preference,damping,max_iter,convergence_iter,n_clusters,converged,n_iter,exemplar_count,runtime,labels_equal_ref,ari_to_ref
0,1,,-12.579091,0.5,200,15,14,True,30,14,0.017635,True,1.0
1,2,,-12.579091,0.5,200,15,14,True,30,14,0.009874,True,1.0
2,3,,-12.579091,0.5,200,15,14,True,30,14,0.008641,True,1.0


In [8]:
runs = run_multiple_ap(
    hk_studies.vdx,
    n_runs=3,
    preference=None,
    damping=0.5,
    random_state=None,
)
summary = summarize_ap_runs(runs)
summary

Unnamed: 0,run,input_preference,preference,damping,max_iter,convergence_iter,n_clusters,converged,n_iter,exemplar_count,runtime,labels_equal_ref,ari_to_ref
0,1,,-15.048569,0.5,200,15,16,True,31,16,0.038113,True,1.0
1,2,,-15.048569,0.5,200,15,16,True,31,16,0.019656,True,1.0
2,3,,-15.048569,0.5,200,15,16,True,31,16,0.015386,True,1.0


## random state 

In [13]:
# Mainz
runs = run_multiple_ap(
    hk_studies.mainz,
    n_runs=3,
    preference=None,
    damping=0.5,
    random_state=5,
)
summary = summarize_ap_runs(runs)
summary

Unnamed: 0,run,input_preference,preference,damping,max_iter,convergence_iter,n_clusters,converged,n_iter,exemplar_count,runtime,labels_equal_ref,ari_to_ref
0,1,,-6.394521,0.5,200,15,14,True,26,14,0.022289,True,1.0
1,2,,-6.394521,0.5,200,15,14,True,26,14,0.015816,True,1.0
2,3,,-6.394521,0.5,200,15,14,True,26,14,0.008215,True,1.0
