In [1]:
import os
import pandas as pd
from definitions import ROOT_DIR

# Performance analysis
In this notebook we organize the results of the evaluation of the trained agents in tables, to facilitate the comparison between them. The tables are the same ones that are presented in the main paper and in the appendix.

We first define some variables and postprocessing functions.

In [22]:
env_names = ["ant", "half_cheetah", "walker", "hopper"]
train_sigma_list = ("sigma_01", "sigma_03", "sigma_05")
new_cols_dict = {
    "adapted": "RMA",
    "attention": "DMAP",
    "attention_no_encoding": "DMAP-ne",
    "convnet": "TCN",
    "oracle": "Oracle",
    "simple": "Simple",
}
env_name_dict = {
    "ant": "Ant",
    "hopper": "Hopper",
    "half_cheetah": "Half Cheetah",
    "walker": "Walker"
}

def index_rename(x):
    for name in env_name_dict:
        if name in x:
            env_name = env_name_dict[name]
    sigma = ".".join((x[-2], x[-1]))
    return f"{env_name} $\sigma={sigma}$"


Load the dataframes with the evaluations of the trained agents

In [13]:
summary_df_list = []
for env_name in env_names:
    df_name = f"{env_name}_experiments_df.pkl"
    df_path = os.path.join(ROOT_DIR, "data", df_name)
    summary_df_list.append(pd.read_pickle(df_path))


## Comparisons 1:1
* Simple vs. Oracle IID
* RMA vs. Oracle IID
* TCN vs DMAP IID

In [51]:
def algo_comparison_table(summary_df_list, algo_1, algo_2):
    df_list = []
    for idx, env_name in enumerate(env_names):
        df = summary_df_list[idx]
        df = df[(df.algorithm == algo_1) | (df.algorithm == algo_2)]
        df = df[df.train_sigma == df.test_sigma].groupby(["train_sigma","algorithm"],as_index=False).reward.agg(["mean", "sem"]).reset_index()
        df["train_sigma"] = df.train_sigma.apply(lambda x: float(f"{x[-2]}.{x[-1]}"))
        df["env"] = env_name
        df = df.set_index("train_sigma").sort_values("algorithm", ascending=False)
        df["reward"] = df.apply(lambda x: f"${int(round(x['mean']))} \pm {int(round(x['sem']))}$", axis=1)
        df = df.pivot_table(values='reward', index=df.index, columns=["env", "algorithm"], aggfunc='first')
        df_list.append(df)

    performance_table = pd.concat(df_list, axis=1)
    order = [algo_1, algo_2]
    performance_table = performance_table.reindex(order, axis=1, level=1)
    performance_table = performance_table.rename(columns=new_cols_dict)
    performance_table = performance_table.rename(columns=env_name_dict)
    performance_table = performance_table.rename_axis(index="$\sigma$", columns=["Env", "Algo"])
    return performance_table

In [52]:
# Simple vs Oracle IID

algo_comparison_table(summary_df_list, "simple", "oracle")

Env,Ant,Ant,Half Cheetah,Half Cheetah,Walker,Walker,Hopper,Hopper
Algo,Simple,Oracle,Simple,Oracle,Simple,Oracle,Simple,Oracle
$\sigma$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.1,$2164 \pm 17$,$2148 \pm 16$,$1633 \pm 25$,$2203 \pm 17$,$1909 \pm 17$,$1780 \pm 26$,$1807 \pm 25$,$1859 \pm 18$
0.3,$1270 \pm 20$,$1723 \pm 18$,$1412 \pm 22$,$1469 \pm 24$,$691 \pm 17$,$908 \pm 23$,$1121 \pm 18$,$1296 \pm 26$
0.5,$391 \pm 14$,$974 \pm 18$,$482 \pm 19$,$668 \pm 24$,$397 \pm 14$,$625 \pm 17$,$863 \pm 19$,$919 \pm 18$


In [53]:
# RMA vs oracle IID
algo_comparison_table(summary_df_list, "adapted", "oracle")

Env,Ant,Ant,Half Cheetah,Half Cheetah,Walker,Walker,Hopper,Hopper
Algo,RMA,Oracle,RMA,Oracle,RMA,Oracle,RMA,Oracle
$\sigma$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.1,$2138 \pm 16$,$2148 \pm 16$,$2197 \pm 16$,$2203 \pm 17$,$1750 \pm 28$,$1780 \pm 26$,$1859 \pm 19$,$1859 \pm 18$
0.3,$1700 \pm 17$,$1723 \pm 18$,$1402 \pm 27$,$1469 \pm 24$,$836 \pm 23$,$908 \pm 23$,$1267 \pm 27$,$1296 \pm 26$
0.5,$966 \pm 16$,$974 \pm 18$,$595 \pm 26$,$668 \pm 24$,$579 \pm 17$,$625 \pm 17$,$730 \pm 21$,$919 \pm 18$


In [54]:
# TCN vs DMAP IID
algo_comparison_table(summary_df_list, "attention", "convnet")

Env,Ant,Ant,Half Cheetah,Half Cheetah,Walker,Walker,Hopper,Hopper
Algo,DMAP,TCN,DMAP,TCN,DMAP,TCN,DMAP,TCN
$\sigma$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.1,$2240 \pm 11$,$932 \pm 45$,$2261 \pm 11$,$2278 \pm 10$,$1229 \pm 24$,$1060 \pm 42$,$1842 \pm 18$,$1762 \pm 16$
0.3,$1623 \pm 19$,$251 \pm 11$,$1577 \pm 22$,$1540 \pm 20$,$893 \pm 11$,$518 \pm 19$,$1316 \pm 24$,$1368 \pm 20$
0.5,$960 \pm 14$,$481 \pm 20$,$669 \pm 23$,$553 \pm 27$,$743 \pm 15$,$584 \pm 18$,$953 \pm 15$,$1017 \pm 18$


In [55]:
# DMAP vs DMAP-ne
algo_comparison_table(summary_df_list, "attention", "attention_no_encoding")

Env,Ant,Ant,Half Cheetah,Half Cheetah,Walker,Walker,Hopper,Hopper
Algo,DMAP,DMAP-ne,DMAP,DMAP-ne,DMAP,DMAP-ne,DMAP,DMAP-ne
$\sigma$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.1,$2240 \pm 11$,$1966 \pm 18$,$2261 \pm 11$,$1493 \pm 15$,$1229 \pm 24$,$337 \pm 16$,$1842 \pm 18$,$984 \pm 33$
0.3,$1623 \pm 19$,$1542 \pm 19$,$1577 \pm 22$,$1132 \pm 29$,$893 \pm 11$,$470 \pm 16$,$1316 \pm 24$,$748 \pm 23$
0.5,$960 \pm 14$,$881 \pm 12$,$669 \pm 23$,$507 \pm 29$,$743 \pm 15$,$341 \pm 17$,$953 \pm 15$,$482 \pm 20$


In [56]:
# IID performance
df_list = []
for idx, env_name in enumerate(env_names):
    df = summary_df_list[idx]
    df = df[df.train_sigma == df.test_sigma].groupby(["train_sigma", "algorithm"],as_index=False).reward.agg(["mean", "sem"]).reset_index()
    df["train_sigma"] = df.train_sigma.apply(lambda x: "_".join((env_name, x)))
    df = df.set_index("train_sigma")
    df["reward"] = df.apply(lambda x: f"${int(round(x['mean']))} \pm {int(round(x['sem']))}$", axis=1)
    df = df.pivot_table(values='reward', index=df.index, columns='algorithm', aggfunc='first')
    df_list.append(df)

performance_table = pd.concat(df_list)
performance_table = performance_table.rename(columns=new_cols_dict, index=index_rename)
performance_table = performance_table.rename_axis(index="$\sigma$", columns="Algorithm")
performance_table

Algorithm,RMA,DMAP,DMAP-ne,TCN,Oracle,Simple
$\sigma$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ant $\sigma=0.1$,$2138 \pm 16$,$2240 \pm 11$,$1966 \pm 18$,$932 \pm 45$,$2148 \pm 16$,$2164 \pm 17$
Ant $\sigma=0.3$,$1700 \pm 17$,$1623 \pm 19$,$1542 \pm 19$,$251 \pm 11$,$1723 \pm 18$,$1270 \pm 20$
Ant $\sigma=0.5$,$966 \pm 16$,$960 \pm 14$,$881 \pm 12$,$481 \pm 20$,$974 \pm 18$,$391 \pm 14$
Half Cheetah $\sigma=0.1$,$2197 \pm 16$,$2261 \pm 11$,$1493 \pm 15$,$2278 \pm 10$,$2203 \pm 17$,$1633 \pm 25$
Half Cheetah $\sigma=0.3$,$1402 \pm 27$,$1577 \pm 22$,$1132 \pm 29$,$1540 \pm 20$,$1469 \pm 24$,$1412 \pm 22$
Half Cheetah $\sigma=0.5$,$595 \pm 26$,$669 \pm 23$,$507 \pm 29$,$553 \pm 27$,$668 \pm 24$,$482 \pm 19$
Walker $\sigma=0.1$,$1750 \pm 28$,$1229 \pm 24$,$337 \pm 16$,$1060 \pm 42$,$1780 \pm 26$,$1909 \pm 17$
Walker $\sigma=0.3$,$836 \pm 23$,$893 \pm 11$,$470 \pm 16$,$518 \pm 19$,$908 \pm 23$,$691 \pm 17$
Walker $\sigma=0.5$,$579 \pm 17$,$743 \pm 15$,$341 \pm 17$,$584 \pm 18$,$625 \pm 17$,$397 \pm 14$
Hopper $\sigma=0.1$,$1859 \pm 19$,$1842 \pm 18$,$984 \pm 33$,$1762 \pm 16$,$1859 \pm 18$,$1807 \pm 25$


In [58]:
# OOD performance
performance_table_list = []
for train_sigma in train_sigma_list:
    df_list = []
    for idx, env_name in enumerate(env_names):
        df = summary_df_list[idx]
        df = df[df.train_sigma == train_sigma].groupby(["test_sigma", "algorithm"],as_index=False).reward.agg(["mean", "sem"]).reset_index()
        df["test_sigma"] = df.test_sigma.apply(lambda x: "_".join((env_name, x)))
        df["reward"] = df.apply(lambda x: f"${int(round(x['mean']))} \pm {int(round(x['sem']))}$", axis=1)
        df = df.set_index("test_sigma")
        df = df.pivot_table(values='reward', index=df.index, columns='algorithm', aggfunc='first')
        df_list.append(df)
    performance_table = pd.concat(df_list)
    performance_table = performance_table.rename(columns=new_cols_dict, index=index_rename)
    performance_table = performance_table.rename_axis(index="$\sigma$", columns="Algorithm")
    performance_table_list.append(performance_table)

In [59]:
print("OOD performance train sigma 0.1")
performance_table_list[0]

OOD performance train sigma 0.1


Algorithm,RMA,DMAP,DMAP-ne,TCN,Oracle,Simple
$\sigma$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ant $\sigma=0.1$,$2138 \pm 16$,$2240 \pm 11$,$1966 \pm 18$,$932 \pm 45$,$2148 \pm 16$,$2164 \pm 17$
Ant $\sigma=0.3$,$1124 \pm 26$,$1105 \pm 26$,$1004 \pm 22$,$479 \pm 26$,$1078 \pm 27$,$1034 \pm 28$
Ant $\sigma=0.5$,$604 \pm 17$,$579 \pm 18$,$536 \pm 16$,$289 \pm 15$,$598 \pm 16$,$572 \pm 19$
Ant $\sigma=0.7$,$145 \pm 30$,$129 \pm 30$,$139 \pm 28$,$162 \pm 16$,$154 \pm 30$,$169 \pm 26$
Half Cheetah $\sigma=0.1$,$2197 \pm 16$,$2261 \pm 11$,$1493 \pm 15$,$2278 \pm 10$,$2203 \pm 17$,$1633 \pm 25$
Half Cheetah $\sigma=0.3$,$1159 \pm 38$,$1369 \pm 38$,$877 \pm 36$,$1426 \pm 36$,$966 \pm 43$,$825 \pm 36$
Half Cheetah $\sigma=0.5$,$216 \pm 49$,$374 \pm 52$,$137 \pm 46$,$475 \pm 51$,$100 \pm 48$,$208 \pm 41$
Half Cheetah $\sigma=0.7$,$-307 \pm 45$,$-142 \pm 48$,$-205 \pm 44$,$-147 \pm 48$,$-375 \pm 44$,$-225 \pm 42$
Walker $\sigma=0.1$,$1750 \pm 28$,$1229 \pm 24$,$337 \pm 16$,$1060 \pm 42$,$1780 \pm 26$,$1909 \pm 17$
Walker $\sigma=0.3$,$877 \pm 37$,$666 \pm 25$,$283 \pm 15$,$504 \pm 31$,$622 \pm 34$,$900 \pm 37$


In [60]:
print("OOD performance train sigma 0.3")
performance_table_list[1]

OOD performance train sigma 0.3


Algorithm,RMA,DMAP,DMAP-ne,TCN,Oracle,Simple
$\sigma$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ant $\sigma=0.1$,$2103 \pm 6$,$2051 \pm 7$,$2015 \pm 6$,$238 \pm 12$,$2124 \pm 5$,$1671 \pm 20$
Ant $\sigma=0.3$,$1700 \pm 17$,$1623 \pm 19$,$1542 \pm 19$,$251 \pm 11$,$1723 \pm 18$,$1270 \pm 20$
Ant $\sigma=0.5$,$924 \pm 22$,$856 \pm 22$,$833 \pm 20$,$227 \pm 9$,$900 \pm 24$,$796 \pm 18$
Ant $\sigma=0.7$,$306 \pm 32$,$228 \pm 33$,$230 \pm 32$,$185 \pm 12$,$261 \pm 30$,$283 \pm 28$
Half Cheetah $\sigma=0.1$,$1795 \pm 11$,$1900 \pm 9$,$1578 \pm 15$,$1719 \pm 8$,$1824 \pm 9$,$1704 \pm 11$
Half Cheetah $\sigma=0.3$,$1402 \pm 27$,$1577 \pm 22$,$1132 \pm 29$,$1540 \pm 20$,$1469 \pm 24$,$1412 \pm 22$
Half Cheetah $\sigma=0.5$,$822 \pm 42$,$1033 \pm 38$,$552 \pm 42$,$1117 \pm 35$,$892 \pm 43$,$787 \pm 40$
Half Cheetah $\sigma=0.7$,$350 \pm 48$,$453 \pm 47$,$151 \pm 45$,$462 \pm 45$,$250 \pm 50$,$267 \pm 45$
Walker $\sigma=0.1$,$906 \pm 22$,$943 \pm 9$,$482 \pm 15$,$574 \pm 19$,$1028 \pm 21$,$846 \pm 12$
Walker $\sigma=0.3$,$836 \pm 23$,$893 \pm 11$,$470 \pm 16$,$518 \pm 19$,$908 \pm 23$,$691 \pm 17$


In [61]:
print("OOD performance train sigma 0.5")
performance_table_list[2]

OOD performance train sigma 0.5


Algorithm,RMA,DMAP,DMAP-ne,TCN,Oracle,Simple
$\sigma$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ant $\sigma=0.1$,$1667 \pm 5$,$1477 \pm 5$,$1347 \pm 7$,$692 \pm 30$,$1665 \pm 6$,$358 \pm 13$
Ant $\sigma=0.3$,$1428 \pm 11$,$1280 \pm 11$,$1133 \pm 11$,$620 \pm 26$,$1454 \pm 11$,$444 \pm 14$
Ant $\sigma=0.5$,$966 \pm 16$,$960 \pm 14$,$881 \pm 12$,$481 \pm 20$,$974 \pm 18$,$391 \pm 14$
Ant $\sigma=0.7$,$409 \pm 27$,$504 \pm 22$,$364 \pm 31$,$273 \pm 21$,$479 \pm 21$,$236 \pm 20$
Half Cheetah $\sigma=0.1$,$748 \pm 23$,$816 \pm 12$,$718 \pm 18$,$466 \pm 33$,$834 \pm 17$,$601 \pm 7$
Half Cheetah $\sigma=0.3$,$697 \pm 23$,$788 \pm 15$,$660 \pm 22$,$603 \pm 27$,$766 \pm 21$,$585 \pm 9$
Half Cheetah $\sigma=0.5$,$595 \pm 26$,$669 \pm 23$,$507 \pm 29$,$553 \pm 27$,$668 \pm 24$,$482 \pm 19$
Half Cheetah $\sigma=0.7$,$399 \pm 30$,$443 \pm 30$,$303 \pm 33$,$373 \pm 31$,$364 \pm 33$,$312 \pm 27$
Walker $\sigma=0.1$,$746 \pm 13$,$949 \pm 7$,$533 \pm 17$,$702 \pm 16$,$783 \pm 14$,$468 \pm 12$
Walker $\sigma=0.3$,$722 \pm 14$,$887 \pm 11$,$490 \pm 17$,$716 \pm 16$,$787 \pm 13$,$484 \pm 13$
