In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import matplotlib.pyplot as plt
import pickle

In [2]:
eval_results_dir = "../cross_eval_results"
eval_results_dir = Path(eval_results_dir)

In [3]:
train_env_to_run_map = {
    "trunk": "final_trunk_thesis_training_42_policy_final",
    "stairs": "final_stairs_from_scratch_thesis_16_policy_final",
    "barrier": "final_barrier_thesis_42_policy_final",
    "gauss_fine": "final_fine_gaussian_terrain_thesis_98_policy_final",
    "gauss_coarse": "final_coarse_gaussian_terrain_thesis_98_policy_final",
}

In [4]:
def list_of_dicts_to_dict_of_lists(list_of_dicts):
    """
    Convert a list of dictionaries to a dictionary of lists.
    """
    dict_of_lists = {}
    for d in list_of_dicts:
        for k, v in d.items():
            if k not in dict_of_lists:
                dict_of_lists[k] = []
            dict_of_lists[k].append(v)
    return dict_of_lists

In [29]:
from collections import defaultdict
results_per_env_easy = defaultdict(dict)
results_per_env_hard = defaultdict(dict)
for train_env, run_name in train_env_to_run_map.items():
    run_dir = eval_results_dir / run_name
    for res in run_dir.glob("*.pkl"):
        if "easy" in res.stem:
            with open(res, "rb") as f:
                eval_env = res.stem
                results_per_env_easy[train_env][eval_env] = list_of_dicts_to_dict_of_lists(pickle.load(f))
        elif "hard" in res.stem:
            with open(res, "rb") as f:
                eval_env = res.stem
                results_per_env_hard[train_env][eval_env] = list_of_dicts_to_dict_of_lists(pickle.load(f))

results_per_env_easy.keys(), results_per_env_hard.keys()

(dict_keys(['trunk', 'stairs', 'barrier', 'gauss_fine', 'gauss_coarse']),
 dict_keys(['trunk', 'stairs', 'barrier', 'gauss_fine', 'gauss_coarse']))

In [30]:
success_key = "eval/pct_succeeded"

In [33]:
import pandas as pd
import numpy as np  

mean_success_per_env_easy = {}
mean_success_per_env_hard = {}

for train_env in results_per_env_easy.keys():
    mean_success_per_env_easy[train_env] = {}
    for eval_env in map(lambda x: f"{x}_easy", results_per_env_easy.keys()):
        results = results_per_env_easy[train_env][eval_env]
        mean_success_per_env_easy[train_env][eval_env] = np.mean(results[success_key])
        
for train_env in results_per_env_hard.keys():
    mean_success_per_env_hard[train_env] = {}
    for eval_env in map(lambda x: f"{x}_hard", results_per_env_hard.keys()):
        results = results_per_env_hard[train_env][eval_env]
        mean_success_per_env_hard[train_env][eval_env] = np.mean(results[success_key])


mean_success_per_env_easy_df = pd.DataFrame(mean_success_per_env_easy).T
mean_success_per_env_hard_df = pd.DataFrame(mean_success_per_env_hard).T

In [48]:
mean_success_per_env_easy_df  

Unnamed: 0,trunk_easy,stairs_easy,barrier_easy,gauss_fine_easy,gauss_coarse_easy
trunk,0.995833,0.85,0.7625,0.379167,0.516667
stairs,0.0,0.35625,0.027083,0.022917,0.064583
barrier,0.014583,0.460417,0.98125,0.35625,0.277083
gauss_fine,0.88125,0.666667,0.764583,0.902083,0.75625
gauss_coarse,0.977083,0.847917,0.8375,0.839583,0.94375


In [56]:
mean_success_per_env_hard_df    

Unnamed: 0,trunk_hard,stairs_hard,barrier_hard,gauss_fine_hard,gauss_coarse_hard
trunk,0.895833,0.245833,0.291667,0.0375,0.189583
stairs,0.0,0.189583,0.041667,0.0,0.029167
barrier,0.0,0.227083,0.504167,0.008333,0.2125
gauss_fine,0.322917,0.214583,0.164583,0.1375,0.53125
gauss_coarse,0.729167,0.375,0.335417,0.116667,0.7375


In [53]:
# process the easy dataframe
# 1. compute the mean success rate of policies not trained on this env
non_specialist_succ = mean_success_per_env_easy_df.copy()
for i in range(len(non_specialist_succ)):
    non_specialist_succ.iloc[i, i] = 0.0
non_specialist_rates = non_specialist_succ.sum(axis=1) / (len(non_specialist_succ.columns) - 1)
# 2. compute the non-specialist success rate
sampling_rates_easy = non_specialist_rates / non_specialist_rates.sum()
sampling_rates_easy

trunk           0.243478
stairs          0.011122
barrier         0.107583
gauss_fine      0.297877
gauss_coarse    0.339939
dtype: float64

In [55]:
# process the hard dataframe
# 1. compute the mean success rate of policies not trained on this env
non_specialist_succ_hard = mean_success_per_env_hard_df.copy()
for i in range(len(non_specialist_succ_hard)):
    non_specialist_succ_hard.iloc[i, i] = 0.0
non_specialist_rates_hard = non_specialist_succ_hard.sum(axis=1) / (len(non_specialist_succ_hard.columns) - 1)
# 2. compute the non-specialist success rate
sampling_rates_hard = non_specialist_rates_hard / non_specialist_rates_hard.sum()
sampling_rates_hard

trunk           0.187724
stairs          0.017391
barrier         0.109974
gauss_fine      0.302813
gauss_coarse    0.382097
dtype: float64

In [57]:
final_sampling_rates = (sampling_rates_easy + sampling_rates_hard) / 2
final_sampling_rates

trunk           0.215601
stairs          0.014257
barrier         0.108779
gauss_fine      0.300345
gauss_coarse    0.361018
dtype: float64