In [34]:
import glob
import os
from pathlib import Path
from collections import defaultdict

import numpy as np
import pandas as pd

In [None]:
def find_csv_files(log_dir, algos, filename) -> list[Path]:
    # Matches …/<SEED>/debug/**/finetuning_eval.csv  (depth under debug doesn't matter)
    
    csv_files = dict()
    for algo, env_name, exp_log_dir in algos:
        pattern = os.path.join(log_dir, algo, exp_log_dir, "*", "debug", "**", filename)
        files = [p for p in glob.glob(pattern, recursive=True)]
        if env_name not in csv_files:
            csv_files[env_name] = {algo: files}
        else:
            csv_files[env_name][algo] = files 
    return csv_files


log_dir = "/n/fs/prl-chongyiz/exp_logs/fdrl_logs"
algos = [
    # antsoccer-arena-navigate-singletask
    # ("fdrl", "antsoccer-arena-navigate-singletask-task1-v0", "20250718_fdrl_antsoccer-arena-navigate-singletask-task1-v0_discount=0.995_alpha_critic=3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    # ("fdrl", "antsoccer-arena-navigate-singletask-task2-v0", "20250718_fdrl_antsoccer-arena-navigate-singletask-task2-v0_discount=0.995_alpha_critic=3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    # ("fdrl", "antsoccer-arena-navigate-singletask-task3-v0", "20250718_fdrl_antsoccer-arena-navigate-singletask-task3-v0_discount=0.995_alpha_critic=3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    # ("fdrl", "antsoccer-arena-navigate-singletask-task4-v0", "20250718_fdrl_antsoccer-arena-navigate-singletask-task4-v0_discount=0.995_alpha_critic=3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    # ("fdrl", "antsoccer-arena-navigate-singletask-task5-v0", "20250718_fdrl_antsoccer-arena-navigate-singletask-task5-v0_discount=0.995_alpha_critic=3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    ("fdrl", "antsoccer-arena-navigate-singletask-task1-v0", "20250721_fdrl_antsoccer-arena-navigate-singletask-task1-v0_discount=0.995_alpha_critic=0.3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "antsoccer-arena-navigate-singletask-task2-v0", "20250721_fdrl_antsoccer-arena-navigate-singletask-task2-v0_discount=0.995_alpha_critic=0.3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "antsoccer-arena-navigate-singletask-task3-v0", "20250721_fdrl_antsoccer-arena-navigate-singletask-task3-v0_discount=0.995_alpha_critic=0.3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "antsoccer-arena-navigate-singletask-task4-v0", "20250721_fdrl_antsoccer-arena-navigate-singletask-task4-v0_discount=0.995_alpha_critic=0.3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "antsoccer-arena-navigate-singletask-task5-v0", "20250721_fdrl_antsoccer-arena-navigate-singletask-task5-v0_discount=0.995_alpha_critic=0.3_alpha_actor=10_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    
    # humanoidmaze-medium-navigate-singletask
    ("fdrl", "humanoidmaze-medium-navigate-singletask-task1-v0", "20250718_fdrl_humanoidmaze-medium-navigate-singletask-task1-v0_discount=0.995_alpha_critic=1_alpha_actor=30_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "humanoidmaze-medium-navigate-singletask-task2-v0", "20250718_fdrl_humanoidmaze-medium-navigate-singletask-task2-v0_discount=0.995_alpha_critic=1_alpha_actor=30_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "humanoidmaze-medium-navigate-singletask-task3-v0", "20250718_fdrl_humanoidmaze-medium-navigate-singletask-task3-v0_discount=0.995_alpha_critic=1_alpha_actor=30_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "humanoidmaze-medium-navigate-singletask-task4-v0", "20250718_fdrl_humanoidmaze-medium-navigate-singletask-task4-v0_discount=0.995_alpha_critic=1_alpha_actor=30_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "humanoidmaze-medium-navigate-singletask-task5-v0", "20250718_fdrl_humanoidmaze-medium-navigate-singletask-task5-v0_discount=0.995_alpha_critic=1_alpha_actor=30_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    
    # cube-double-play-singletask
    ("fdrl", "cube-double-play-singletask-task1-v0", "20250721_fdrl_cube-double-play-singletask-task1-v0_discount=0.995_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    ("fdrl", "cube-double-play-singletask-task2-v0", "20250721_fdrl_cube-double-play-singletask-task2-v0_discount=0.995_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    ("fdrl", "cube-double-play-singletask-task3-v0", "20250721_fdrl_cube-double-play-singletask-task3-v0_discount=0.995_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    ("fdrl", "cube-double-play-singletask-task4-v0", "20250721_fdrl_cube-double-play-singletask-task4-v0_discount=0.995_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),
    ("fdrl", "cube-double-play-singletask-task5-v0", "20250721_fdrl_cube-double-play-singletask-task5-v0_discount=0.995_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=False_actor_layer_norm=True_single_noises"),

    # puzzle-3x3-play-singletask
    ("fdrl", "puzzle-3x3-play-singletask-task1-v0", "20250721_fdrl_puzzle-3x3-play-singletask-task1-v0_discount=0.99_alpha_critic=2_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "puzzle-3x3-play-singletask-task2-v0", "20250721_fdrl_puzzle-3x3-play-singletask-task2-v0_discount=0.99_alpha_critic=2_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "puzzle-3x3-play-singletask-task3-v0", "20250722_fdrl_puzzle-3x3-play-singletask-task3-v0_discount=0.99_alpha_critic=2_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "puzzle-3x3-play-singletask-task4-v0", "20250722_fdrl_puzzle-3x3-play-singletask-task4-v0_discount=0.99_alpha_critic=2_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "puzzle-3x3-play-singletask-task5-v0", "20250722_fdrl_puzzle-3x3-play-singletask-task5-v0_discount=0.99_alpha_critic=2_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),

    # scene-play-singletask
    ("fdrl", "scene-play-singletask-task1-v0", "20250722_fdrl_scene-play-singletask-task1-v0_discount=0.99_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "scene-play-singletask-task2-v0", "20250722_fdrl_scene-play-singletask-task2-v0_discount=0.99_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "scene-play-singletask-task3-v0", "20250722_fdrl_scene-play-singletask-task3-v0_discount=0.99_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "scene-play-singletask-task4-v0", "20250722_fdrl_scene-play-singletask-task4-v0_discount=0.99_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
    ("fdrl", "scene-play-singletask-task5-v0", "20250722_fdrl_scene-play-singletask-task5-v0_discount=0.99_alpha_critic=1_alpha_actor=300_critic_loss_type=q-learning_value_layer_norm=True_actor_layer_norm=True_single_noises"),
]

# env_name_maps = {
#     'cheetah_run': 'cheetah run',
#     'quadruped_jump': 'quadruped jump',
#     'walker_walk': 'walker walk',
#     'walker_flip': 'walker flip',
#     'cube-double-play-singletask-task1-v0': 'cube double task 1',
#     'scene-play-singletask-task1-v0': 'scene task 1',
# }
# algo_name_maps = {
#     'sarsa_ifql_vib_gpi_offline2offline': 'InFOM (Ours)',
#     'fb_repr_fom_offline2offline': 'FB + FOM',
#     'hilp_fom_offline2offline': 'HILP + FOM',
# }


csv_files = find_csv_files(log_dir, algos, filename="eval.csv")

In [36]:
def load_data(csv_path, stat_name, step_name) -> np.ndarray:
    df = pd.read_csv(csv_path)
    if stat_name not in df.columns:
        raise KeyError(f"{csv_path} doesn't contain {stat_name}")
    x = df[step_name].values
    y = df[stat_name].values
    return dict(x=x, y=y)

algo_data = defaultdict(dict)
for env_name, env_csv_files in csv_files.items():
    for algo, csv_files in env_csv_files.items():
        seed_data = []
        for csv_file in csv_files:
            data = load_data(csv_file, "evaluation/success", "step")
            if len(seed_data) == 0:
                seed_data.append(data["x"])
            else:
                assert np.all(data["x"] == seed_data[0])
            seed_data.append(data["y"])

        seed_data = np.asarray(seed_data)
        # steps = seed_data[0]
        # data = seed_data[1:]
        
        algo_data[env_name][algo] = seed_data
        if len(seed_data) == 1:
            print("Warning: only one random seed!")

In [37]:
print(algo_data['cube-double-play-singletask-task1-v0']['fdrl'][0].shape)
print(algo_data['cube-double-play-singletask-task1-v0']['fdrl'][1:].shape)

(11,)
(2, 11)


In [38]:
for env_name, env_data in algo_data.items():
    for algo, data in env_data.items():
        y = data[1:, -3:].mean(axis=-1)
        y_mean = np.mean(y, axis=0)
        y_std = np.std(y, axis=0, ddof=1)
        
        print(f"env = {env_name}, {algo}: mean = {y_mean:.4f}, std = {y_std:.4f}")

env = antsoccer-arena-navigate-singletask-task1-v0, fdrl: mean = 0.8633, std = 0.0330
env = antsoccer-arena-navigate-singletask-task2-v0, fdrl: mean = 0.8733, std = 0.0189
env = antsoccer-arena-navigate-singletask-task3-v0, fdrl: mean = 0.5233, std = 0.0141
env = antsoccer-arena-navigate-singletask-task4-v0, fdrl: mean = 0.3767, std = 0.0990
env = antsoccer-arena-navigate-singletask-task5-v0, fdrl: mean = 0.2167, std = 0.1179
env = humanoidmaze-medium-navigate-singletask-task1-v0, fdrl: mean = 0.1956, std = 0.0559
env = humanoidmaze-medium-navigate-singletask-task2-v0, fdrl: mean = 0.4778, std = 0.2247
env = humanoidmaze-medium-navigate-singletask-task3-v0, fdrl: mean = 0.5889, std = 0.1118
env = humanoidmaze-medium-navigate-singletask-task4-v0, fdrl: mean = 0.0022, std = 0.0038
env = humanoidmaze-medium-navigate-singletask-task5-v0, fdrl: mean = 0.4067, std = 0.1867
env = cube-double-play-singletask-task1-v0, fdrl: mean = 0.8633, std = 0.0424
env = cube-double-play-singletask-task2-v0