In [1]:
import os
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import davies_bouldin_score, calinski_harabasz_score
import plotly.graph_objects as go
from random import choices
from sklearn.manifold import TSNE

# from evaluate import evaluate
from utils.io import load_file

In [2]:
labels_dir = os.path.join(os.getcwd(), '../result/clustering/cluster_result/labels')
methods = ['davies_bouldin_score', 'calinski_harabasz_score']
methods_dict = {'davies_bouldin_score': davies_bouldin_score, 'calinski_harabasz_score': calinski_harabasz_score}
evaluation_dir = '/scratch/yk1962/ABC2/result/evaluation'
latent_space_dir = os.path.join(os.getcwd(), '../result/clustering/cluster_result/latent_space')
# result_dict = {'byol_latent_space.npy': 'Byol-k=32.npy', 'atlasnet_latent_space.npy': 'AtlasNet-k=32.npy'}

In [3]:
def rename():
    for name in list(filter(lambda name: 'IIC' in name, os.listdir(latent_space_dir))):
        path = os.path.join(latent_space_dir, name)
        command = 'mv {} {}'.format(path, path+'.npy')
        os.system(command)

In [4]:
latent_space_list = sorted(os.listdir(latent_space_dir))
latent_space_list

['AtlasNet_latent_space.npy',
 'BYOL_latent_space.npy',
 'DEC_latent_space-k=1024.npy',
 'DEC_latent_space-k=128.npy',
 'DEC_latent_space-k=2000.npy',
 'DEC_latent_space-k=256.npy',
 'DEC_latent_space-k=32.npy',
 'DEC_latent_space-k=512.npy',
 'DEC_latent_space-k=64.npy',
 'DeepCluster_latent_space-k=1024.npy',
 'DeepCluster_latent_space-k=128.npy',
 'DeepCluster_latent_space-k=2000.npy',
 'DeepCluster_latent_space-k=256.npy',
 'DeepCluster_latent_space-k=32.npy',
 'DeepCluster_latent_space-k=512.npy',
 'DeepCluster_latent_space-k=64.npy',
 'IIC_latent_space-k=1024.npy',
 'IIC_latent_space-k=128.npy',
 'IIC_latent_space-k=2000.npy',
 'IIC_latent_space-k=256.npy',
 'IIC_latent_space-k=32.npy',
 'IIC_latent_space-k=512.npy',
 'IIC_latent_space-k=64.npy',
 'MVCNN_latent_space.npy',
 'SCAN_latent_space-k=1024.npy',
 'SCAN_latent_space-k=128.npy',
 'SCAN_latent_space-k=2000.npy',
 'SCAN_latent_space-k=256.npy',
 'SCAN_latent_space-k=32.npy',
 'SCAN_latent_space-k=512.npy',
 'SCAN_latent_spa

In [5]:
labels_list = sorted(list(filter(lambda name: '.npy' in name, os.listdir(labels_dir))))
labels_list

['AtlasNet-k=1024.npy',
 'AtlasNet-k=128.npy',
 'AtlasNet-k=2000.npy',
 'AtlasNet-k=256.npy',
 'AtlasNet-k=32.npy',
 'AtlasNet-k=512.npy',
 'AtlasNet-k=64.npy',
 'BYOL-k=1024.npy',
 'BYOL-k=128.npy',
 'BYOL-k=2000.npy',
 'BYOL-k=256.npy',
 'BYOL-k=32.npy',
 'BYOL-k=512.npy',
 'BYOL-k=64.npy',
 'DEC-k=1024.npy',
 'DEC-k=128.npy',
 'DEC-k=2000.npy',
 'DEC-k=256.npy',
 'DEC-k=32.npy',
 'DEC-k=512.npy',
 'DEC-k=64.npy',
 'DeepCluster-k=1024.npy',
 'DeepCluster-k=128.npy',
 'DeepCluster-k=2000.npy',
 'DeepCluster-k=256.npy',
 'DeepCluster-k=32.npy',
 'DeepCluster-k=512.npy',
 'DeepCluster-k=64.npy',
 'IIC-k=1024.npy',
 'IIC-k=128.npy',
 'IIC-k=2000.npy',
 'IIC-k=256.npy',
 'IIC-k=32.npy',
 'IIC-k=512.npy',
 'IIC-k=64.npy',
 'MVCNN-k=1024.npy',
 'MVCNN-k=128.npy',
 'MVCNN-k=2000.npy',
 'MVCNN-k=256.npy',
 'MVCNN-k=32.npy',
 'MVCNN-k=512.npy',
 'MVCNN-k=64.npy',
 'SCAN-k=1024.npy',
 'SCAN-k=128.npy',
 'SCAN-k=2000.npy',
 'SCAN-k=256.npy',
 'SCAN-k=32.npy',
 'SCAN-k=512.npy',
 'SCAN-k=64.npy']

In [6]:
def save_obj(obj, name, obj_dir, postfix=True):
    filepath = os.path.join(obj_dir, name)
    if postfix:
        filepath += '.pkl'
    with open(filepath, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [7]:
# evaluation_dict = load_file('evaluation_result', evaluation_dir, load_type='pkl')
# evaluation_dict = {}
evaluation_dict = load_file('extra_evaluation_result', evaluation_dir, load_type='pkl')

In [8]:
for latent_space_name in tqdm(latent_space_list):        
    latent_space = np.load(os.path.join(latent_space_dir, latent_space_name))
    method_name = latent_space_name.split('_')[0]
    relavent_labels = list(filter(lambda name: method_name in name, labels_list))
    if 'k=' in latent_space_name:
        relavent_labels = list(filter(lambda name: latent_space_name.split('k=')[-1].split('.')[0] in name, relavent_labels))

    for method in methods:
        for labels_name in relavent_labels:
            labels = np.load(os.path.join(labels_dir, labels_name))
            subject_name = 'cluster result name: {}, evaluation method: {}, labels={}'.format(latent_space_name, method, labels_name)
            if subject_name in evaluation_dict:
                continue
            score = methods_dict[method](latent_space, labels)
            evaluation_dict[subject_name] = score
            
#         save_obj(evaluation_dict, 'evaluation_result', evaluation_dir)

100%|██████████| 31/31 [00:15<00:00,  1.99it/s]


In [9]:
evaluation_dict

{'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=1024.npy': 8.670681520858725,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=128.npy': 24.282790220985977,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=2000.npy': 5.914476572752382,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=256.npy': 17.311879688688958,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=32.npy': 44.76687261624034,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=512.npy': 12.419137068955035,
 'cluster result name: AtlasNet_latent_space.npy, evaluation method: davies_bouldin_score, labels=AtlasNet-k=64.npy': 32.81692879989689,
 'cluster result name: AtlasNet

In [10]:
save_obj(evaluation_dict, 'extra_evaluation_result', evaluation_dir)

In [None]:
keys = 
for key in 