In [5]:
from pathlib import Path
import shutil as sh
import json
import pandas as pd
import numpy as np

name="3-2-abs_mag"
models_path = Path(f'../runs/{name}/models')
clusters_path = Path(f'../runs/{name}/clusters')
stats_path = Path(f'../runs/{name}/stats')

if not stats_path.exists():
    stats_path.mkdir()


for model in models_path.iterdir():
    if not model.is_dir():
        continue
    sh.copy(str(model / 'stats.json') , str(stats_path / f'{model.stem}.json') )
    
stats_files = list(stats_path.iterdir())

In [6]:
stats_df = pd.DataFrame()
for stat_f in stats_files:
    print("========= " + stat_f.stem + " ==========")
    
    with open(stat_f, 'r') as f:
        stats = json.load(f)
        
        for k, run in stats.items():
            print(f'--- {k}: {len(run)} ---') 
            
            # print(k, run)
            
            loss = 0
            cluster_sizes = []
            for cluster in run:
                loss += cluster['loss']*cluster['cluster_size']
                cluster_sizes.append(cluster['cluster_size'])
            loss /= sum(cluster_sizes) 
            
            stats_df = pd.concat([stats_df, pd.DataFrame([
                    {'model': stat_f.stem, 
                    'run': k, 
                    'cluster sizes':cluster_sizes, 
                    'nclusters': len(run),
                    'total_length': sum([c['cluster_size'] for c in run]),
                    'avg loss': loss,
                    'min loss' : min([c['loss'] for c in run]),
                    'max loss': max([c['loss'] for c in run]),
                    'std loss': np.std([c['loss'] for c in run]),}
                ])
            ])
                
            print("Average Cluster Loss: ", loss)
stats_df.reset_index(drop=True, inplace=True)
stats_df[['run', 'nclusters']] = stats_df[['run', 'nclusters']].astype(int)
        
    

--- 0: 4 ---
Average Cluster Loss:  0.0802650625021767
--- 4: 3 ---
Average Cluster Loss:  0.08084889762492828
--- 5: 3 ---
Average Cluster Loss:  0.08096273578959752
--- 6: 3 ---
Average Cluster Loss:  0.08187170039915247
--- 0: 2 ---
Average Cluster Loss:  0.08026963639784443
--- 1: 3 ---
Average Cluster Loss:  0.07952267485690921
--- 2: 4 ---
Average Cluster Loss:  0.07768291501446555
--- 3: 5 ---
Average Cluster Loss:  0.07678782324609776
--- 4: 6 ---
Average Cluster Loss:  0.0756994840313104
--- 5: 7 ---
Average Cluster Loss:  0.0714240249234649
--- 0: 2 ---
Average Cluster Loss:  0.06164542422592311
--- 1: 3 ---
Average Cluster Loss:  0.05367243460878551
--- 2: 4 ---
Average Cluster Loss:  0.04942258907033101
--- 3: 5 ---
Average Cluster Loss:  0.048821621746705894
--- 4: 6 ---
Average Cluster Loss:  0.04906956942154014
--- 0: 2 ---
Average Cluster Loss:  0.08173391563423366
--- 1: 3 ---
Average Cluster Loss:  0.08068000560238857
--- 2: 4 ---
Average Cluster Loss:  0.079500426613

# Model Results

In [7]:
stats_df

Unnamed: 0,model,run,cluster sizes,nclusters,total_length,avg loss,min loss,max loss,std loss
0,minisom_alpha,0,"[3874, 3334, 2460, 2103]",4,11771,0.080265,0.079247,0.082735,0.001379
1,quantile_pca_dbscan,4,"[7168, 4521, 82]",3,11771,0.080849,0.077315,0.111069,0.014801
2,quantile_pca_dbscan,5,"[7183, 4530, 58]",3,11771,0.080963,0.080413,0.117962,0.017482
3,quantile_pca_dbscan,6,"[7187, 4540, 44]",3,11771,0.081872,0.079627,0.135266,0.025478
4,maxabs_tsne_kmeans,0,"[4763, 7008]",2,11771,0.080270,0.078874,0.081218,0.001172
...,...,...,...,...,...,...,...,...,...
135,pca_kmeans_mag,2,"[3672, 2473, 1845, 3781]",4,11771,0.048840,0.039608,0.071897,0.012346
136,pca_kmeans_mag,3,"[2977, 2316, 1628, 3258, 1592]",5,11771,0.052145,0.043489,0.091126,0.018149
137,pca_kmeans_mag,4,"[1555, 1984, 1371, 2814, 1210, 2837]",6,11771,0.047438,0.035457,0.077219,0.013490
138,quantile_pca_agg,0,"[7208, 4563]",2,11771,0.081674,0.080651,0.082321,0.000835


## Best Models

In [8]:
top10 = list(stats_df['avg loss'].nsmallest(10).index)
stats_df.loc[top10]

Unnamed: 0,model,run,cluster sizes,nclusters,total_length,avg loss,min loss,max loss,std loss
137,pca_kmeans_mag,4,"[1555, 1984, 1371, 2814, 1210, 2837]",6,11771,0.047438,0.035457,0.077219,0.01349
70,quantile_tsne_kmeans,7,"[1052, 1360, 1468, 1282, 1388, 1514, 1474, 108...",9,11771,0.048017,0.038904,0.076967,0.011158
76,quantile_tsne_agg,5,"[2242, 2027, 1612, 1037, 1504, 1914, 1435]",7,11771,0.048802,0.038029,0.073685,0.011037
13,tsne_kmeans_mag,3,"[2977, 2316, 1628, 3258, 1592]",5,11771,0.048822,0.040434,0.075762,0.013049
135,pca_kmeans_mag,2,"[3672, 2473, 1845, 3781]",4,11771,0.04884,0.039608,0.071897,0.012346
14,tsne_kmeans_mag,4,"[1555, 1984, 1371, 2814, 1210, 2837]",6,11771,0.04907,0.035604,0.087748,0.017211
12,tsne_kmeans_mag,2,"[3672, 2473, 1845, 3781]",4,11771,0.049423,0.040598,0.072607,0.012156
23,tsne_agg_mag,3,"[2466, 3115, 3213, 1485, 1492]",5,11771,0.049901,0.039285,0.078845,0.013916
77,quantile_tsne_agg,6,"[1612, 2027, 1914, 1037, 1504, 1143, 1435, 1099]",8,11771,0.049963,0.035505,0.081437,0.013708
114,minisom_mag,0,"[3443, 2772, 3760, 1796]",4,11771,0.050588,0.041635,0.08041,0.015499
