In [3]:
import numpy as np
import pandas as pd

import glob
from pprint import pprint
import pathlib
import yaml

In [4]:
roots = [
    "../multirun/2024-09-26/*/*",
    "../multirun/2024-09-27/*/*",
    "../multirun/2024-09-28/*/*",
    "../multirun/2024-09-29/*/*",
    "../multirun/2024-10-24/*/*",
    "../multirun/2024-10-25/*/*",
]

In [5]:
directory_paths = []
for d in roots:
    g = pathlib.Path('.').glob(d)
    for path in g:
        if path.is_dir():
            directory_paths.append(path)

In [6]:
def extract_config_from_path(p: pathlib.Path) -> dict[str, str | int]:
    config_path = p / ".hydra" / "config.yaml"
    with open(config_path, "r") as f:
        config_obj = yaml.full_load(f)
    if "exp2" in str(config_path):
        # exp2 tries to fool all metrics at once
        config_obj |= {'metric': 'all'}
    return {k: v for (k, v) in config_obj.items() if k in ("projection", "dataset", "k", "metric")}


directory_to_config = {d: extract_config_from_path(d) for d in directory_paths}

In [7]:
def read_metrics_per_epoch(p: pathlib.Path, config: dict) -> pd.DataFrame:
    fpath = p / "metrics_per_epoch.csv"
    df = pd.read_csv(fpath, header=0, index_col=False)

    for k, v in config.items():
        df.loc[:, k] = v
    return df


def read_postprocess_metrics(p: pathlib.Path, config: dict) -> pd.DataFrame:
    fpath = p / "postprocess_metrics.csv"
    df = pd.read_csv(fpath, header=0)

    for k, v in config.items():
        df.loc[:, k] = v
    return df

In [8]:
per_epoch_df = pd.concat(read_metrics_per_epoch(d, conf) for d, conf in directory_to_config.items())


In [9]:
postprocess_df= pd.concat(read_postprocess_metrics(d, conf) for d, conf in directory_to_config.items())

In [10]:
per_epoch_df

Unnamed: 0,epoch,average_local_error,class_aware_continuity,class_aware_trustworthiness,continuity,distance_consistency,false_neighbors,jaccard,missing_neighbors,mrre_data,...,pearson_correlation,procrustes,scale_normalized_stress,shepard_goodness,true_neighbors,trustworthiness,projection,dataset,k,metric
0,1,0.197,0.949,0.941,0.943,0.655,0.951,0.028,0.951,0.116,...,0.654,0.999,0.149,0.598,0.049,0.885,umap,har,7,jaccard
1,100,0.100,0.976,0.974,0.972,0.708,0.903,0.057,0.903,0.056,...,0.726,0.997,0.112,0.729,0.097,0.944,umap,har,7,jaccard
2,200,0.097,0.978,0.981,0.974,0.694,0.887,0.068,0.887,0.046,...,0.758,0.996,0.097,0.768,0.113,0.954,umap,har,7,jaccard
3,300,0.096,0.978,0.986,0.974,0.705,0.869,0.079,0.869,0.039,...,0.751,0.996,0.098,0.761,0.131,0.961,umap,har,7,jaccard
4,400,0.090,0.979,0.989,0.974,0.700,0.848,0.092,0.848,0.034,...,0.750,0.995,0.097,0.760,0.152,0.966,umap,har,7,jaccard
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,700,0.366,0.874,0.838,0.682,0.585,0.918,0.082,0.918,0.327,...,-0.244,0.000,0.461,-0.318,0.082,0.673,mds,reuters,1,all
8,800,0.354,0.873,0.844,0.682,0.599,0.916,0.084,0.916,0.317,...,-0.240,0.000,0.441,-0.313,0.084,0.683,mds,reuters,1,all
9,900,0.361,0.873,0.858,0.680,0.619,0.916,0.084,0.916,0.314,...,-0.235,0.000,0.430,-0.303,0.084,0.686,mds,reuters,1,all
10,1000,0.362,0.873,0.861,0.681,0.626,0.914,0.086,0.914,0.309,...,-0.229,0.000,0.424,-0.298,0.086,0.690,mds,reuters,1,all


> -for each D: show table with P rows and M columns with the final metric values, both numerically and color coded (for color coding, see e.g. style in https://webspace.science.uu.nl/~telea001/uploads/PAPERS/EuroVis20/paper1.pdf, Tab 2, you get the idea). 

In [10]:
per_epoch_df[per_epoch_df.epoch == -1]

Unnamed: 0,epoch,average_local_error,class_aware_continuity,class_aware_trustworthiness,continuity,distance_consistency,false_neighbors,jaccard,missing_neighbors,mrre_data,...,pearson_correlation,procrustes,scale_normalized_stress,shepard_goodness,true_neighbors,trustworthiness,projection,dataset,k,metric
11,-1,0.159,0.991,0.994,0.988,0.696,0.686,0.209,0.686,0.021,...,0.597,0.999,0.161,0.535,0.314,0.978,umap,har,7,jaccard
11,-1,0.025,0.884,0.901,0.852,0.782,0.824,0.116,0.824,0.285,...,0.900,0.998,0.069,0.879,0.176,0.705,mds,spambase,51,jaccard
11,-1,0.152,0.990,0.994,0.962,0.948,0.514,0.336,0.514,0.019,...,0.496,0.989,0.140,0.454,0.486,0.967,tsne,usps,51,jaccard
11,-1,0.134,0.937,0.964,0.910,0.790,0.556,0.312,0.556,0.066,...,0.369,0.985,0.241,0.394,0.444,0.873,tsne,spambase,51,jaccard
11,-1,0.244,0.985,0.984,0.934,0.804,0.556,0.301,0.556,0.027,...,0.422,0.988,0.158,0.393,0.444,0.951,tsne,mnist,51,jaccard
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11,-1,0.030,0.906,0.917,0.872,0.804,0.809,0.124,0.809,0.260,...,0.634,0.983,0.230,0.535,0.191,0.709,isomap,spambase,51,all
11,-1,0.061,0.955,0.962,0.937,0.747,0.782,0.132,0.782,0.081,...,0.784,0.996,0.088,0.812,0.218,0.912,isomap,har,51,all
11,-1,0.270,0.878,0.703,0.749,0.448,0.950,0.030,0.950,0.440,...,0.563,0.996,0.144,0.535,0.050,0.565,mds,reuters,51,all
11,-1,0.286,0.937,0.810,0.822,0.600,0.833,0.102,0.833,0.278,...,0.041,0.989,0.219,0.005,0.167,0.665,tsne,reuters,51,all


In [None]:
per_epoch_df.to_csv("./outputs/per_epoch_for_d3.csv", header=True, index=False)
postprocess_df.to_csv("./outputs/postprocess_for_d3.csv", header=True, index=False)