In [9]:
from plotly.subplots import make_subplots
from plotting import plot_values, update_layout
import glob
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
import json
import os
import matplotlib.pyplot as plt

### Get the names of all files we want to read 

In [2]:
records = []
for experiment_file in glob.glob("../data/*/*/params.json", recursive=True):
    with open(experiment_file, "r", encoding="utf-8") as file:
        record = json.load(file)
        record["one_sided"] = record.get("one_sided", False)
        records.append(record)

all_data = pd.DataFrame.from_records(records)
def get_best_reward(dir_path):
    try:
        x = pd.read_csv("../"+dir_path+"/log.txt", sep="\t")
    except:
        print(dir_path)
        return -1
    return x["AverageReward"].max()

all_data['reward'] = all_data.dir_path.apply(get_best_reward)
all_data = all_data[all_data.reward > 0]
all_data['task'] = all_data.env_name.str.replace("Safety", '').str.replace("Velocity", '')
all_data['best_env_reward'] = all_data.groupby(["task", "one_sided"]).reward.transform('max')
data = (
    all_data[all_data.reward == all_data.best_env_reward]
    .reset_index(drop=True)
    .drop(["filter", "policy_type", "rollout_length", "shift", "best_env_reward", "env_name"], axis=1)
)

print(data.columns)
print(data.shape)

data1, data2 = data[data.one_sided].reset_index(drop=True), data[~data.one_sided].reset_index(drop=True)
display(data2)
data1

Index(['delta_std', 'deltas_used', 'dir_path', 'n_directions', 'n_iter',
       'n_workers', 'seed', 'step_size', 'one_sided', 'reward', 'task'],
      dtype='object')
(10, 11)


Unnamed: 0,delta_std,deltas_used,dir_path,n_directions,n_iter,n_workers,seed,step_size,one_sided,reward,task
0,0.03,8,data/SafetyHopperVelocity-v1/1725646921.2394385,32,1000,10,0,0.02,False,3759.856103,Hopper-v1
1,0.025,8,data/SafetyHalfCheetahVelocity-v1/1725699845.6...,32,1000,10,1,0.02,False,6579.093664,HalfCheetah-v1
2,0.0075,230,data/SafetyHumanoidVelocity-v1/1724932285.1594813,230,1000,10,1,0.02,False,8343.450275,Humanoid-v1
3,0.02,3,data/SafetySwimmerVelocity-v1/1725708292.1058798,10,1000,10,1,0.01,False,360.476544,Swimmer-v1
4,0.02,40,data/SafetyAntVelocity-v1/1725884544.292244,80,1000,10,1,0.02,False,5405.538917,Ant-v1
5,0.025,30,data/SafetyWalker2dVelocity-v1/1725652020.0178647,80,1000,10,0,0.02,False,5931.48521,Walker2d-v1


Unnamed: 0,delta_std,deltas_used,dir_path,n_directions,n_iter,n_workers,seed,step_size,one_sided,reward,task
0,0.02,32,data/SafetyHopperVelocity-v1/1726902256.4911687,32,1000,10,0,0.02,True,3187.735834,Hopper-v1
1,0.01,230,data/SafetyHumanoidVelocity-v1/1726900585.92097,350,1000,10,1,0.01,True,4273.545104,Humanoid-v1
2,0.025,60,data/SafetyAntVelocity-v1/1726938583.4316018,80,1000,10,1,0.015,True,3291.96547,Ant-v1
3,0.025,30,data/SafetyWalker2dVelocity-v1/1726892910.1610074,100,1000,10,0,0.02,True,1479.499932,Walker2d-v1


In [3]:
x = data1.drop(["dir_path", 'seed', 'n_iter', 'n_workers', 'reward'], axis=1)
x = x[['task'] + [col for col in x if col not in ("env_name", "task")]].sort_values('task', ignore_index=True)
print(x.to_latex(
    index=False,
    caption="Optimal hyperparameters from search for ARS-1",
    escape=True,
    float_format="{:.2f}".format
))

x = data2.drop(["dir_path", 'seed', 'n_iter', 'n_workers', 'reward'], axis=1)
x = x[['task'] + [col for col in x if col not in ("env_name", "task")]].sort_values('task', ignore_index=True)
print(x.to_latex(
    index=False,
    caption="Optimal hyperparameters from search for ARS-2",
    escape=True,
    float_format="{:.2f}".format
))

\begin{table}
\caption{Optimal hyperparameters from search for ARS-1}
\begin{tabular}{lrrrrr}
\toprule
task & delta\_std & deltas\_used & n\_directions & step\_size & one\_sided \\
\midrule
Ant-v1 & 0.03 & 60 & 80 & 0.01 & True \\
Hopper-v1 & 0.02 & 32 & 32 & 0.02 & True \\
Humanoid-v1 & 0.01 & 230 & 350 & 0.01 & True \\
Walker2d-v1 & 0.03 & 30 & 100 & 0.02 & True \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}
\caption{Optimal hyperparameters from search for ARS-2}
\begin{tabular}{lrrrrr}
\toprule
task & delta\_std & deltas\_used & n\_directions & step\_size & one\_sided \\
\midrule
Ant-v1 & 0.02 & 40 & 80 & 0.02 & False \\
HalfCheetah-v1 & 0.03 & 8 & 32 & 0.02 & False \\
Hopper-v1 & 0.03 & 8 & 32 & 0.02 & False \\
Humanoid-v1 & 0.01 & 230 & 230 & 0.02 & False \\
Swimmer-v1 & 0.02 & 3 & 10 & 0.01 & False \\
Walker2d-v1 & 0.03 & 30 & 80 & 0.02 & False \\
\bottomrule
\end{tabular}
\end{table}



In [22]:
def plot_env(env_name, data):
    df = []
    for filename, one_sided in zip(data.dir_path, data.one_sided):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")
        x["algorithm"] = "ARS-1" if one_sided else "ARS-2"
        df.append(x)
    df = pd.concat(df, axis=0).groupby(["Iteration", "algorithm"])[["timesteps", "AverageReward"]].mean().reset_index()
    # df.drop("Iteration", axis=1, inplace=True)
    df.rename({
        'timesteps': "steps",
        'AverageReward': "reward"
    }, axis=1, inplace=True)
    df.steps = df.steps.astype(int)
    df["env_name"] = env_name
    fig = px.line(data_frame=df, x="Iteration", y="reward", color="algorithm")
    update_layout(fig, env_name.replace("Safety","").replace("Velocity", ""), "Iteration", "Reward", row=1, col=1, upkwargs=dict(width=500, height=400))
    fig.write_image(f"../images/{env_name}.png", scale=3)
    fig.show()
    return df

In [23]:
for env_name, df in data.groupby("task"):
    plot_env(env_name, df)


### Unstability of SFR-1 wrt hyperparams.

In [18]:
sfr1 = []
for path, one_sided in zip(all_data.dir_path, all_data.one_sided):
    x = pd.read_csv(os.path.join("..", path, "log.txt"), sep="\t")
    _, x['task'], x['dir_path'] = path.split("/")
    x["one_sided"] = one_sided
    sfr1.append(x)
sfr1 = pd.concat(sfr1)
sfr1.head()

Unnamed: 0,Time,Iteration,AverageReward,StdRewards,MaxRewardRollout,MinRewardRollout,timesteps,task,dir_path,one_sided
0,3.120859,10,85.013145,8.373057,113.143287,70.084741,9039,SafetyHopperVelocity-v1,1724913801.39389,False
1,3.893283,20,116.756961,3.622604,127.02975,109.790013,19099,SafetyHopperVelocity-v1,1724913801.39389,False
2,4.887338,30,253.983855,6.214386,264.590033,237.759234,31264,SafetyHopperVelocity-v1,1724913801.39389,False
3,6.534218,40,699.049306,58.457038,871.497197,641.05983,51083,SafetyHopperVelocity-v1,1724913801.39389,False
4,8.626153,50,827.564412,5.561508,872.085952,820.293742,78719,SafetyHopperVelocity-v1,1724913801.39389,False


In [19]:
std1 = sfr1.groupby(["task","Iteration", "one_sided"]).AverageReward.std().reset_index()
std1

Unnamed: 0,task,Iteration,one_sided,AverageReward
0,SafetyAntVelocity-v1,10,False,262.374173
1,SafetyAntVelocity-v1,10,True,176.683966
2,SafetyAntVelocity-v1,20,False,377.357544
3,SafetyAntVelocity-v1,20,True,307.475426
4,SafetyAntVelocity-v1,30,False,481.286315
...,...,...,...,...
995,SafetyWalker2dVelocity-v1,980,True,471.524010
996,SafetyWalker2dVelocity-v1,990,False,1498.642665
997,SafetyWalker2dVelocity-v1,990,True,458.538249
998,SafetyWalker2dVelocity-v1,1000,False,1493.380902


In [21]:
for env_name, df in std1.groupby("task"):
    fig = px.line(data_frame=df, x="Iteration", y="AverageReward", color="one_sided")
    update_layout(fig, env_name.replace("Safety","").replace("Velocity", ""), "Iteration", "reward stddev", row=1, col=1, upkwargs=dict(width=500, height=400))
    fig.show()