In [2]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [3]:
from sklearn.metrics import davies_bouldin_score, calinski_harabasz_score, silhouette_score
from src.clust_utils import dunn_score
import numpy as np
from tqdm import tqdm


dataset = np.load("../data/test/features.npy", allow_pickle=True)
predictions = np.load("../data/test/algorithm_predictions.npy", allow_pickle=True)


# Prepare containers
db_scores = []
ch_scores = []
sil_scores = []
dunn_scores = []
# Iterate through all 3400 samples
for i in tqdm(range(len(dataset))):
    data_i = dataset[i]
    preds_i = predictions[i]
    
    db_i, ch_i, sil_i, dunn_i = [], [], [], []

    for algo_pred in preds_i:
        try:
            db_i.append(davies_bouldin_score(data_i, algo_pred))
        except:
            db_i.append(np.nan)
        try:
            ch_i.append(calinski_harabasz_score(data_i, algo_pred))
        except:
            ch_i.append(np.nan)
        try:
            sil_i.append(silhouette_score(data_i, algo_pred))
        except:
            sil_i.append(np.nan)
        try:
            dunn_i.append(dunn_score(data_i, algo_pred))
        except:
            dunn_i.append(np.nan)

    db_scores.append(np.array(db_i))
    ch_scores.append(np.array(ch_i))
    sil_scores.append(np.array(sil_i))
    dunn_scores.append(np.array(dunn_i))

100%|██████████| 3400/3400 [20:00<00:00,  2.83it/s] 


In [5]:
save_dir = "../data/test/"
score_arrays = {
    "db_scores": db_scores,
    "ch_scores": ch_scores,
    "sil_scores": sil_scores,
    "dunn_scores": dunn_scores
}

# save as .npy
for name, array in score_arrays.items():
    np.save(os.path.join(save_dir, f"{name}.npy"), np.array(array))