In [1]:
import os
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt

ENVS = [
    ("CartPole-v1", 5e5),
    ("LunarLander-v2", 5e5),
    ("Swimmer-v4", 5e5),
    ("HalfCheetah-v4", 3e6),
    ("Boxing-v5", 1e8),
    ("SpaceInvaders-v5", 1e8),
    
    ("Acrobot-v1", 5e5),
    ("Pendulum-v1", 5e5),
    ("BipedalWalker-v3", 2e6),
    
    ("Hopper-v4", 1e6),
    ("Walker2d-v4", 2e6),
    ("Ant-v4", 1e7),
    ("Humanoid-v4", 1e7),
    
    ("Atlantis-v5", 2e7),
    ("BeamRider-v5", 2e7),
    ("Pong-v5", 2e7),
    ("CrazyClimber-v5", 2e7),
    ("Enduro-v5",  2e7),
    
    ("Qbert-v5", 2e7),
    ("Seaquest-v5", 2e7),
]

ATARI_ENVS = [
    ("Atlantis-v5", 2e7),
    ("BeamRider-v5", 2e7),
    ("Boxing-v5", 1e8),
    ("CrazyClimber-v5", 2e7),
    ("Enduro-v5",  2e7),
    ("Pong-v5", 2e7),
    ("Qbert-v5", 2e7),
    ("Seaquest-v5", 2e7),
    ("SpaceInvaders-v5", 1e8),
]


ALL_GYM_ENVS = [
    ("CartPole-v1", 0.1, 4, 475,),
    ("Acrobot-v1", 0.05, 4, -100,),
    ("Pendulum-v1", .1, 32, -100,),
    ("LunarLander-v2", .1, 32, 200,),
    ("BipedalWalker-v3", .1, 48, 300),
    ("Swimmer-v4", .1, 4, 360),
    ("HalfCheetah-v4", 0.05, 17,  4800,),
    ("Hopper-v4", 0.05, 32,  3000,),
    ("Walker2d-v4", 0.05, 51,  3000,),
    ("Ant-v4", 0.05, 108,  5000,),
    ("Humanoid-v4", 0.01, 128, 6000,),
]

def load_data(env_folder):
    stats_list = []
    for strat in os.listdir(env_folder):
        path = os.path.join(env_folder, strat)
        if not os.path.isdir(path): continue
        for i, (run) in enumerate(os.listdir(path)):
            path = os.path.join(env_folder, strat, run)
            if not os.path.isdir(path): continue
            stats = pd.read_csv(os.path.join(path, "stats.csv"), skipinitialspace=True)
            stats['run'] = i
            stats['folder'] = run
            stats['strat'] = strat
            sig, lamb = strat.split("sigma-")[1].split("-lambda-")
            stats['method'] = stats.strat.str.split("-norm-").str[0]
            stats['sigma0'] = float(sig)
            stats['lambda'] = int(lamb)
            stats['train'] = stats[['best', 'current']].max(axis=1)
            stats['expected_test'] = stats[['best_median', 'current_median']].max(axis=1)
            stats_list.append(stats)
    return pd.concat(stats_list, ignore_index=True)


In [4]:
rl_data = pd.read_pickle("../data2/rl_data3.pkl")
es_data = pd.DataFrame()
for env_name, time in ENVS:
    if not env_name.endswith("v5"):
        env_data = pd.read_pickle(f"../data3/{env_name}/data_hyp.pkl")
    else:
        env_data = pd.read_pickle(f"../data3/{env_name}/data.pkl")
        print(env_name)
    env_data = env_data[
        ["method", "run", "n_train_episodes", "n_train_timesteps", "test", "train", "lambda", "sigma0"]
    ]
    env_data['env'] = env_name
    es_data = pd.concat([es_data, env_data])

data = pd.concat([es_data, rl_data]).reset_index(drop=True)

Boxing-v5
SpaceInvaders-v5
Atlantis-v5
BeamRider-v5
Pong-v5
CrazyClimber-v5
Enduro-v5
Qbert-v5
Seaquest-v5


In [10]:
titles = "CSA-ES", "CMA-ES", "sep-CMA-ES", 'ARS', "DQN", "PPO", "SAC", "DQN*", "PPO*", "SAC*",
keys =  'csa','cma-es', 'sep-cma-es', 'ars', 'dqn_large', 'ppo_large', 'sac_large', 'dqn_small', 'ppo_small', 'sac_small',
    
max_reward_table = ' & Timesteps & ' + ' & '.join(titles) + ' \\\\ \n'
min_timesteps_table = ' & Threshold & ' + ' & '.join(titles) + ' \\\\ \n'

max_reward_table_rows = []
min_timesteps_table_rows = []


for env_name, *_, threshold in ALL_GYM_ENVS:
    env_data = data[(data.env == env_name)]
    max_time = dict(ENVS).get(env_name)
    mr_row = env_data[(env_data.n_train_timesteps < max_time)].groupby(["method", "run"])['test'].max().groupby("method").mean("test").astype(int)
    max_reward_dict = dict(mr_row)
    mr_row.name = env_name
    max_reward_table_rows.append(mr_row)
    
    min_time_dict = dict.fromkeys(set(env_data.method), 0)
    min_time_dict.update(**dict(env_data[env_data.test >= threshold].groupby(["method", "run"])['n_train_timesteps'].min().groupby("method").min().astype(int)))
    mt_row = pd.Series(min_time_dict)
    mt_row.name = env_name
    min_timesteps_table_rows.append(mt_row) 
    max_reward_table += env_name + ' & ' +  f'${max_time:.0e}$ & '.replace("e+0", "\cdot 10^") + \
        ' & '.join([str(max_reward_dict.get(k) or ' - ') for k in keys]) + ' \\\\ \n'
    
    min_timesteps_table += env_name + ' & ' +  f'{threshold} & ' + \
        ' & '.join([f"${v:.0e}$".replace("e+0", "\cdot 10^").replace("0\cdot 10^0", "\infty") if (v:=min_time_dict.get(k)) is not None else ' - ' for k in keys]) + ' \\\\ \n'

# print(max_reward_table)

pd.DataFrame(min_timesteps_table_rows)[['csa', 'cma-es', 'sep-cma-es', 'ars', 'ars-v2', 'sac_large']]

Unnamed: 0,csa,cma-es,sep-cma-es,ars,ars-v2,sac_large
CartPole-v1,3041.0,1642.0,2751.0,1060.0,560.0,
Acrobot-v1,4361.0,4828.0,3982.0,6720.0,12840.0,
Pendulum-v1,0.0,0.0,0.0,0.0,0.0,0.0
LunarLander-v2,52527.0,74259.0,59815.0,183790.0,193600.0,
BipedalWalker-v3,1751718.0,1809878.0,5093805.0,0.0,0.0,170169.0
Swimmer-v4,400000.0,320000.0,660000.0,0.0,0.0,0.0
HalfCheetah-v4,1620000.0,1170000.0,1620000.0,0.0,2432000.0,49999.0
Hopper-v4,437510.0,300257.0,1019185.0,0.0,824830.0,114798.0
Walker2d-v4,6242793.0,906492.0,1178517.0,0.0,1858660.0,
Ant-v4,25723035.0,25475306.0,24740792.0,0.0,0.0,386421.0


In [11]:
titles = "CSA-ES", "CMA-ES", "sep-CMA-ES", 'ARS', "DQN", "PPO", "SAC", "DQN*", 
keys =  'csa','cma-es', 'sep-cma-es', 'ars', 'dqn_large', 'ppo_large', 'sac_large', 'dqn_small',
    
max_reward_table = ' & Timesteps & ' + ' & '.join(titles) + ' \\\\ \n'
max_reward_table_rows = []

for env_name, *_,  max_time in ALL_GYM_ENVS:
    env_data = data[(data.env == env_name)]
    max_reward_table_row = env_data[(env_data.n_train_timesteps < max_time)].groupby(["method", "run"])['test'].max().groupby("method").mean("test").astype(int)
    max_reward_dict = dict(max_reward_table_row)
    max_reward_table += env_name + ' & ' +  f'${max_time:.0e}$ & '.replace("e+0", "\cdot 10^") + \
        ' & '.join([str(max_reward_dict.get(k) or ' - ') for k in keys]) + ' \\\\ \n'
    
    max_reward_table_row.name = env_name
    max_reward_table_rows.append(max_reward_table_row)
    

# from IPython.display import display, Latex
# display(Latex(f"{max_reward_table}"))
# print(max_reward_table)
pd.DataFrame(max_reward_table_rows)[['csa', 'cma-es', 'sep-cma-es', 'ars']]

KeyError: "['csa'] not in index"

In [15]:
rows = []
for env_name, max_time in ATARI_ENVS:
    env_data = data[(data.env == env_name)]
    row = env_data.groupby(["method", "run"]).max().groupby("method")['test'].std().round(1)
    row.name = env_name
    rows.append(row)
    
    
pd.DataFrame(rows)[['csa', 'cma-es', 'sep-cma-es', 'ars', 'ars-v2']]

method,csa,cma-es,sep-cma-es,ars,ars-v2
Atlantis-v5,13325.7,10928.1,9115.9,3505.6,5745.3
BeamRider-v5,1088.3,581.9,720.6,216.2,263.9
Boxing-v5,3.8,3.2,4.3,4.9,1.8
CrazyClimber-v5,10446.4,6595.2,7893.6,3202.9,5511.5
Enduro-v5,22.1,17.8,22.9,32.1,33.5
Pong-v5,3.8,10.3,9.4,2.2,2.4
Qbert-v5,4037.3,2339.2,3384.3,323.8,2190.7
Seaquest-v5,204.4,116.7,142.7,198.8,19.5
SpaceInvaders-v5,566.6,331.7,190.9,214.0,210.3


Unnamed: 0,env,method,run,n_train_timesteps,test
0,HalfCheetah-v3,ars,1,16000,-0.247008
1,HalfCheetah-v3,ars,1,32000,-0.182667
2,HalfCheetah-v3,ars,1,48000,-0.370420
3,HalfCheetah-v3,ars,1,64000,-0.316714
4,HalfCheetah-v3,ars,1,80000,-0.564291
...,...,...,...,...,...
74,Walker2d-v4,ars-v2,5,1761060,2099.485668
75,Walker2d-v4,ars-v2,5,1818120,2081.993894
76,Walker2d-v4,ars-v2,5,1878380,2205.783239
77,Walker2d-v4,ars-v2,5,1935930,1984.569454
