In [1]:
from plotly.subplots import make_subplots
from plotting import plot_values, update_layout
import glob
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
import json
import os
import matplotlib.pyplot as plt

### Get the names of all files we want to read 

In [2]:
def get_best_reward(dir_path):
    try:
        x = pd.read_csv(os.path.join("..", dir_path, "log.txt"), sep="\t")
        return x["AverageReward"].max()
    except:
        print("?", dir_path)
        return -1

def label_algorithm(filter):
    if filter == "MeanStdFilter":
        return "ARS-v2"
    return "ARS-v1"

In [3]:
records = []
for experiment_file in glob.glob("../data/*/*/params.json", recursive=True):
    with open(experiment_file, "r", encoding="utf-8") as file:
        record = json.load(file)
        records.append(record)

all_data = pd.DataFrame.from_records(records)

# all_data = all_data[all_data.reward > 0]
all_data["task"] = all_data.env_name.str.replace("Safety", "").str.replace(
    "Velocity-v1", ""
)

all_data["algo"] = all_data["filter"].apply(label_algorithm)
# all_data["transform"] = all_data["transform"].apply(lambda x: x.split(":")[0])

all_data["reward"] = all_data.dir_path.apply(get_best_reward)
all_data['best_env_reward'] = 0
# all_data['best_env_reward'] = all_data.groupby(["task", "algo", "transform"]).reward.transform('max')
all_data.drop(
    ["best_env_reward",],
    axis=1,
    inplace=True,
)
all_data.drop("dir_path", axis=1)

Unnamed: 0,delta_std,deltas_used,env_name,filter,n_directions,n_iter,n_workers,policy_type,rollout_length,seed,shift,step_size,transform,task,algo,reward
0,0.02,40,SafetyHalfCheetahVelocity-v1,NoFilter,40,1000,10,linear,1000,1,0,0.02,norm_clip:1,HalfCheetah,ARS-v1,3253.751523
1,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,1,0,0.02,component_clip:0.5,HalfCheetah,ARS-v2,6209.822468
2,0.02,20,SafetyHalfCheetahVelocity-v1,NoFilter,40,1000,10,linear,1000,5,0,0.02,norm_clip:1,HalfCheetah,ARS-v1,3838.236406
3,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,40,1000,10,linear,1000,5,0,0.02,component_clip:0.5,HalfCheetah,ARS-v2,4221.828595
4,0.02,40,SafetyHalfCheetahVelocity-v1,MeanStdFilter,40,1000,10,linear,1000,1,0,0.02,signed,HalfCheetah,ARS-v2,3371.637195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,0.01,10,SafetySwimmerVelocity-v1,MeanStdFilter,30,1000,10,linear,1000,3,0,0.02,norm_clip:1,Swimmer,ARS-v2,355.178504
858,0.01,30,SafetySwimmerVelocity-v1,MeanStdFilter,50,1000,10,linear,1000,3,0,0.02,component_clip:0.5,Swimmer,ARS-v2,355.259914
859,0.02,10,SafetySwimmerVelocity-v1,MeanStdFilter,30,1000,10,linear,1000,1,0,0.02,norm_clip:1,Swimmer,ARS-v2,354.633987
860,0.02,10,SafetySwimmerVelocity-v1,MeanStdFilter,30,1000,10,linear,1000,1,0,0.02,none,Swimmer,ARS-v2,356.009718


This data has the parameters for which it achieved the highest performance

In [4]:
all_data["best_reward"] = all_data.groupby(
    ["task", "algo", "transform"]
).reward.transform("max")

best_data = all_data[all_data.reward == all_data.best_reward].drop_duplicates(
    ["task", "algo", "transform"], keep="first", ignore_index=True
).drop(["reward", "best_reward", "dir_path", "seed"], axis=1)
best_data

Unnamed: 0,delta_std,deltas_used,env_name,filter,n_directions,n_iter,n_workers,policy_type,rollout_length,shift,step_size,transform,task,algo
0,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,0,0.02,component_clip:0.5,HalfCheetah,ARS-v2
1,0.02,20,SafetyHalfCheetahVelocity-v1,NoFilter,120,1000,10,linear,1000,0,0.02,none,HalfCheetah,ARS-v1
2,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,0,0.02,none,HalfCheetah,ARS-v2
3,0.02,20,SafetyHalfCheetahVelocity-v1,NoFilter,80,1000,10,linear,1000,0,0.02,signed,HalfCheetah,ARS-v1
4,0.02,20,SafetyHalfCheetahVelocity-v1,NoFilter,40,1000,10,linear,1000,0,0.02,norm_clip:1,HalfCheetah,ARS-v1
5,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,0,0.02,norm_clip:1,HalfCheetah,ARS-v2
6,0.02,20,SafetyHalfCheetahVelocity-v1,NoFilter,120,1000,10,linear,1000,0,0.02,component_clip:0.5,HalfCheetah,ARS-v1
7,0.02,20,SafetyHalfCheetahVelocity-v1,MeanStdFilter,40,1000,10,linear,1000,0,0.02,signed,HalfCheetah,ARS-v2
8,0.025,40,SafetyWalker2dVelocity-v1,NoFilter,80,1000,10,linear,1000,1,0.02,none,Walker2d,ARS-v1
9,0.025,40,SafetyWalker2dVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,1,0.03,none,Walker2d,ARS-v2


In [5]:
best_data.shape

(48, 14)

In [6]:
def to_run(seed_list):
    return [seed for seed in range(6) if seed not in seed_list][1:]

def label_filter(algo):
    if algo == "ARS-v1":
        return "NoFilter"
    return "MeanStdFilter"

df = best_data.merge(all_data, "inner", list(best_data)).groupby(list(best_data), as_index=False)[["seed"]].agg(list)
df = df[df["transform"].isin(["none", "component_clip:0.5"])].reset_index(drop=True)
df["seed"] = df["seed"].apply(to_run)
df = df[df.seed.apply(len) > 0]
print(df.seed.apply(len).sum())
df = df.explode("seed", ignore_index=True)

df["env_name"] = df.task.apply(lambda x: f"Safety{x}Velocity-v1")
df["filter"] = df["algo"].apply(label_filter)
df.drop(["task", "algo"], axis=1, inplace=True)
print(df.shape)
df.env_name.value_counts()

0
(0, 13)


Series([], Name: count, dtype: int64)

In [7]:
df.to_json("../to_run.json", orient="records", indent=4)