In [12]:
# imports
import collections
from collections import defaultdict
from pathlib import Path
import yaml
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import tensorboard_reducer as tbr

## Save tensorboard logs as CSV files

In [13]:
def flatten(d, parent_key="", sep="_"):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [14]:
root_path = Path("../experiments/2021_baseline/")

In [19]:
def output_df(df_name, condition):
    global_metric_df = pd.DataFrame()
    params_df = pd.DataFrame()
    for dir_path in root_path.rglob("version_*"):
        csv_path = dir_path / "test_metrics.csv"

        if not os.path.exists(str(dir_path) + "/hparams.yaml"):
            continue
        with open(str(dir_path) + "/hparams.yaml") as f:
            conf = yaml.safe_load(f)
            conf.update({"a version id": str(dir_path)[-2:]})
            flatten_conf = flatten(conf)
            flatten_conf = {
                k: str(v) for k, v in flatten_conf.items() if not k.startswith("data")
            }
        try:
            events_dict = tbr.load_tb_events(
                [str(dir_path)], handle_dup_steps="keep-first"
            )
        except AssertionError:
            continue
        test_metrics = {}
        for k, df in events_dict.items():
            if not condition(k):
                continue
            if "obj_metric" in k or "loss" in k:
                scale_factor = 1
            else:
                scale_factor = 100

            test_metrics[k.rsplit("/")[1].lower()] = (
                df.to_numpy().squeeze() * scale_factor
            )

        metrics_df = pd.DataFrame(test_metrics, index=[conf["experiment_name"]])
        metrics_df.to_csv(
            dir_path / "test_metrics.csv", float_format="%.2self.labelsself.labelsf"
        )
        global_metric_df = pd.concat([global_metric_df, metrics_df])
        params_df = pd.concat(
            [params_df, pd.DataFrame(flatten_conf, index=[conf["experiment_name"]])]
        )
    global_metric_df = global_metric_df.sort_index()
    params_df = params_df.sort_index()
    global_metric_df.to_csv(root_path / (df_name + "_metrics.csv"), float_format="%.2f")
    params_df.to_csv(root_path / "params.csv")

In [20]:
condition_name = [
    ("all", lambda k: (k.startswith("Test"))),
    ("AP", lambda k: (k.startswith("Test")) and "ap" in k),
    ("AP__micro", lambda k: (k.startswith("Test")) and "ap_micro" in k),
    ("AP_macro", lambda k: (k.startswith("Test")) and "ap_macro" in k),
    ("micro", lambda k: (k.startswith("Test")) and "micro" in k),
    ("macro", lambda k: (k.startswith("Test")) and "macro" in k),
    ("weak", lambda k: (k.startswith("Test")) and "weak" in k),
    ("strong", lambda k: (k.startswith("Test")) and "strong" in k),
    ("monoph", lambda k: (k.startswith("Test")) and "monoph" in k),
    ("lowpolyph", lambda k: (k.startswith("Test")) and "lowpolyph" in k),
    ("highpolyph", lambda k: (k.startswith("Test")) and "highpolyph" in k),
    ("near", lambda k: (k.startswith("Test")) and ("near" in k)),
    ("far", lambda k: (k.startswith("Test")) and ("far" in k)),
    ("proximity", lambda k: (k.startswith("Test")) and ("near" in k or "far" in k)),
]

In [21]:
for name, condition in condition_name:
    output_df(name, condition)