In [3]:
import wandb
from pathlib import Path
import pandas as pd
from tqdm import tqdm

env_name = "CARLPendulumEnv"
experiment = "benchmarking"
eval_data_fn = "tmp/evaldata.pickle"


# def load_runs_from_wandb(env_name: str, eval_data_fn: str | Path, reload: bool = False, experiment: str = "benchmarking") -> pd.DataFrame:
eval_data_fn = Path(eval_data_fn)
if not eval_data_fn.is_file():
    reload = True

if reload:
    api = wandb.Api()

    # Project is specified by <entity/project-name>
    filters = {
        "config.env": env_name,
        "config.experiment": experiment,
        "config.wandb.job_type": "train",
        # "config.context_sampler.n_samples": 1000,
        "state": "finished",
    }
    metrics = ["eval/return", "train/global_step"]
    runs = api.runs("tnt/carl-tmlr", filters=filters)

    summary_list, config_list, name_list, metrics_list = [], [], [], []
    for run in tqdm(runs, total=len(runs)):
        # # Check metrics first. If not all available, do not append run
        # rows = []
        # for i, row in run.history(keys=metrics).iterrows():
        #     if all([metric in row for metric in metrics]):
        #         # df = df.append(row, ignore_index=True)
        #         rows.append(row)
        #     else:
        #         continue
        # df = pd.DataFrame(rows)
        # metrics_list.append(df)
        
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k,v in run.config.items()
            if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(run.name)


    runs_df = pd.DataFrame({
        "summary": summary_list,
        "config": config_list,
        "name": name_list,
        # "metrics": metrics_list,
        })

    runs_df.to_pickle(eval_data_fn)
else:
    runs_df = pd.read_pickle(eval_data_fn)
    # return runs_df

100%|██████████| 1043/1043 [00:11<00:00, 87.76it/s] 


In [35]:
import numpy as np
from rich import print as printr

rootdir = Path("/home/benjamin/Dokumente/code/tmp/tntcomp/CARL")

result_dirs = []
for index, run in runs_df.iterrows():
    # slurm_id = run["config"]["slurm_id"]
    outdir = run["config"]["output_dir"]
    outdir = "exp_sweep"
    wandb_id = run["config"]["wandb"]["id"]
    dirs = wandb_id.split("_", maxsplit=1)
    result_dir = rootdir / outdir / dirs[0] / dirs[1][:-7]
    result_dirs.append(result_dir)

result_dirs = np.unique(result_dirs)

command_template = "python experiments/evaluation/run_evaluation.py --result_dir '{result_dir}' -m"
commands = [command_template.format(result_dir=p) for p in result_dirs]
for command in commands:
    print(command)

python experiments/evaluation/run_evaluation.py --result_dir '/home/benjamin/Dokumente/code/tmp/tntcomp/CARL/exp_sweep/2022-09-12/08-35-57_benchmark_train' -m
python experiments/evaluation/run_evaluation.py --result_dir '/home/benjamin/Dokumente/code/tmp/tntcomp/CARL/exp_sweep/2022-09-12/12-02-53_benchmark_train' -m
