In [1]:
from plotly.subplots import make_subplots
from plotting import plot_values, update_layout
import glob
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
import json
import os
import matplotlib.pyplot as plt
import numpy as np

In [2]:
import webcolors
def to_opacity(name, opacity):
    r, g, b = webcolors.hex_to_rgb(name)
    return f"rgba({r}, {g}, {b}, {opacity})"

### Get the names of all files we want to read 

In [3]:
records = []
for experiment_file in glob.glob("../data/*/*/params.json", recursive=True):
    with open(experiment_file, "r", encoding="utf-8") as file:
        record = json.load(file)
        records.append(record)

all_data = pd.DataFrame.from_records(records)
all_data.head()

Unnamed: 0,delta_std,deltas_used,dir_path,env_name,filter,n_directions,n_iter,n_workers,policy_type,rollout_length,seed,shift,step_size,transform
0,0.02,40,data/SafetyHalfCheetahVelocity-v1/1730565654.9...,SafetyHalfCheetahVelocity-v1,NoFilter,40,1000,10,linear,1000,1,0,0.02,norm_clip:1
1,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730500751.4...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,1,0,0.02,component_clip:0.5
2,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730895627.8...,SafetyHalfCheetahVelocity-v1,NoFilter,40,1000,10,linear,1000,5,0,0.02,norm_clip:1
3,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730821001.9...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,40,1000,10,linear,1000,5,0,0.02,component_clip:0.5
4,0.02,40,data/SafetyHalfCheetahVelocity-v1/1730536163.2...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,40,1000,10,linear,1000,1,0,0.02,signed


In [4]:
def get_best_reward(dir_path):
    try:
        x = pd.read_csv(os.path.join("..", dir_path, "log.txt"), sep="\t")
        return x["AverageReward"].max()
    except:
        print("?", dir_path)
        return -1

def label_algorithm(filter):
    if filter == "MeanStdFilter":
        return "ARS-v2"
    return "ARS-v1"

In [5]:
all_data["reward"] = all_data.dir_path.apply(get_best_reward)
# all_data = all_data[all_data.reward > 0]
all_data["task"] = all_data.env_name.str.replace("Safety", "").str.replace(
    "Velocity-v1", ""
)
all_data["algo"] = all_data["filter"].apply(label_algorithm)
all_data["transform"] = all_data["transform"].apply(lambda x: x.split(":")[0])
all_data["best_env_reward"] = all_data.groupby(
    ["task", "algo", "transform"]
).reward.transform("max")
data = (
    all_data[all_data.reward == all_data.best_env_reward]
    .reset_index(drop=True)
    .drop(
        [
            "filter",
            "policy_type",
            "rollout_length",
            "shift",
            "best_env_reward",
            "env_name",
        ],
        axis=1,
    )
)

data["best_reward"] = data.groupby(["task", "algo", "transform"]).reward.transform(
    "max"
)
best_data = (
    data[data.reward == data.best_reward]
    .reset_index(drop=True)
    .drop_duplicates(["task", "algo", "transform"], keep="first")
)
params = [
    "delta_std",
    "deltas_used",
    "n_directions",
    "n_iter",
    "n_workers",
    "step_size",
    "transform",
    "task",
    "algo",
]
best_data = all_data.merge(best_data[params], 'inner', params)
print(best_data.shape)
best_data.head()

(215, 18)


Unnamed: 0,delta_std,deltas_used,dir_path,env_name,filter,n_directions,n_iter,n_workers,policy_type,rollout_length,seed,shift,step_size,transform,reward,task,algo,best_env_reward
0,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730500751.4...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,1,0,0.02,component_clip,6209.822468,HalfCheetah,ARS-v2,6209.822468
1,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730840572.3...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,4,0,0.02,component_clip,5835.287921,HalfCheetah,ARS-v2,6209.822468
2,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730847035.8...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,5,0,0.02,component_clip,4362.630043,HalfCheetah,ARS-v2,6209.822468
3,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730827593.8...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,2,0,0.02,component_clip,3955.95568,HalfCheetah,ARS-v2,6209.822468
4,0.02,20,data/SafetyHalfCheetahVelocity-v1/1730834168.8...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,80,1000,10,linear,1000,3,0,0.02,component_clip,3882.183609,HalfCheetah,ARS-v2,6209.822468


In [6]:
best_data[['task', 'algo', 'transform']].value_counts()

task         algo    transform     
Ant          ARS-v1  component_clip    5
Swimmer      ARS-v1  signed            5
Hopper       ARS-v2  signed            5
Ant          ARS-v1  none              5
Humanoid     ARS-v1  none              5
             ARS-v2  component_clip    5
                     none              5
Swimmer      ARS-v1  component_clip    5
                     none              5
                     norm_clip         5
             ARS-v2  component_clip    5
Hopper       ARS-v2  none              5
Swimmer      ARS-v2  none              5
                     norm_clip         5
                     signed            5
Walker2d     ARS-v1  component_clip    5
                     none              5
             ARS-v2  component_clip    5
                     none              5
                     norm_clip         5
Hopper       ARS-v2  norm_clip         5
Humanoid     ARS-v1  component_clip    5
Hopper       ARS-v2  component_clip    5
HalfCheetah  ARS-v2  

In [7]:
def to_paper(x: pd.DataFrame, caption, index=False) -> str:
    return (x.to_latex(
    index=index,
    caption=caption,
    escape=True,
    float_format="{:.2f}".format
)
.replace("\\toprule", "\\hline")
.replace("\\midrule", "\\hline")
.replace("\\bottomrule", "\\hline"))

In [8]:
def f(x):
    return f"{x['mean']:.2f}\\pm {x['std']:.2f}"
out = best_data.groupby(["task", "algo", "transform"]).reward.agg(['mean', 'std','min', 'max'])#
out['f'] = out.apply(f, axis=1)
display(out)

best_perf = pd.pivot_table(
    out.reset_index(),
    index=["task", "algo"],
    columns=["transform"],
    values="f",
    aggfunc='first'
).round(2)
display(best_perf)
print(to_paper(best_perf, "Performance on each task", index=True))

best_perf = pd.pivot_table(
    out.reset_index(),
    index=["task", "algo"],
    columns=["transform"],
    values="max",
    aggfunc='first'
).round(2)
display(best_perf)
print(to_paper(best_perf, "Best perf on each task", index=True))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std,min,max,f
task,algo,transform,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ant,ARS-v1,component_clip,2929.676539,219.383197,2694.460346,3256.608653,2929.68\pm 219.38
Ant,ARS-v1,none,3095.53587,131.375235,2910.784365,3241.832802,3095.54\pm 131.38
Ant,ARS-v1,norm_clip,2357.141203,167.930101,2074.215075,2517.563398,2357.14\pm 167.93
Ant,ARS-v1,signed,2330.583398,56.48567,2246.023757,2393.792193,2330.58\pm 56.49
Ant,ARS-v2,component_clip,4682.744543,683.344924,3475.257392,5145.876784,4682.74\pm 683.34
Ant,ARS-v2,none,4344.523844,651.678055,3598.612299,5167.538895,4344.52\pm 651.68
Ant,ARS-v2,norm_clip,4243.698876,739.214567,3010.497857,4769.367497,4243.70\pm 739.21
Ant,ARS-v2,signed,4251.799917,505.480325,3391.426548,4629.304931,4251.80\pm 505.48
HalfCheetah,ARS-v1,component_clip,4097.170105,156.326185,3918.683155,4301.485767,4097.17\pm 156.33
HalfCheetah,ARS-v1,none,3889.746609,601.838226,2822.411604,4289.968569,3889.75\pm 601.84


Unnamed: 0_level_0,transform,component_clip,none,norm_clip,signed
task,algo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ant,ARS-v1,2929.68\pm 219.38,3095.54\pm 131.38,2357.14\pm 167.93,2330.58\pm 56.49
Ant,ARS-v2,4682.74\pm 683.34,4344.52\pm 651.68,4243.70\pm 739.21,4251.80\pm 505.48
HalfCheetah,ARS-v1,4097.17\pm 156.33,3889.75\pm 601.84,3786.04\pm 140.15,3345.06\pm 518.20
HalfCheetah,ARS-v2,4849.18\pm 1094.69,4621.09\pm 908.41,4377.46\pm 724.93,4110.96\pm 846.69
Hopper,ARS-v1,2312.74\pm 817.09,2603.87\pm 591.21,3055.70\pm nan,2313.28\pm 271.81
Hopper,ARS-v2,3639.82\pm 52.60,3719.36\pm 110.70,3511.56\pm 151.94,3518.42\pm 103.51
Humanoid,ARS-v1,207.57\pm 0.29,207.57\pm 0.29,207.76\pm nan,207.76\pm nan
Humanoid,ARS-v2,7474.23\pm 363.71,7393.55\pm 308.10,1149.50\pm 195.47,1003.92\pm 123.59
Swimmer,ARS-v1,356.83\pm 0.35,356.82\pm 0.80,356.52\pm 0.54,356.10\pm 0.56
Swimmer,ARS-v2,345.57\pm 27.36,357.60\pm 1.17,356.54\pm 0.55,354.95\pm 2.39


\begin{table}
\caption{Performance on each task}
\begin{tabular}{llllll}
\hline
 & transform & component\_clip & none & norm\_clip & signed \\
task & algo &  &  &  &  \\
\hline
\multirow[t]{2}{*}{Ant} & ARS-v1 & 2929.68\textbackslash pm 219.38 & 3095.54\textbackslash pm 131.38 & 2357.14\textbackslash pm 167.93 & 2330.58\textbackslash pm 56.49 \\
 & ARS-v2 & 4682.74\textbackslash pm 683.34 & 4344.52\textbackslash pm 651.68 & 4243.70\textbackslash pm 739.21 & 4251.80\textbackslash pm 505.48 \\
\cline{1-6}
\multirow[t]{2}{*}{HalfCheetah} & ARS-v1 & 4097.17\textbackslash pm 156.33 & 3889.75\textbackslash pm 601.84 & 3786.04\textbackslash pm 140.15 & 3345.06\textbackslash pm 518.20 \\
 & ARS-v2 & 4849.18\textbackslash pm 1094.69 & 4621.09\textbackslash pm 908.41 & 4377.46\textbackslash pm 724.93 & 4110.96\textbackslash pm 846.69 \\
\cline{1-6}
\multirow[t]{2}{*}{Hopper} & ARS-v1 & 2312.74\textbackslash pm 817.09 & 2603.87\textbackslash pm 591.21 & 3055.70\textbackslash pm nan & 2313.28\text

Unnamed: 0_level_0,transform,component_clip,none,norm_clip,signed
task,algo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ant,ARS-v1,3256.61,3241.83,2517.56,2393.79
Ant,ARS-v2,5145.88,5167.54,4769.37,4629.3
HalfCheetah,ARS-v1,4301.49,4289.97,3944.57,3973.4
HalfCheetah,ARS-v2,6209.82,6142.34,5359.48,5620.93
Hopper,ARS-v1,3018.37,3261.84,3055.7,2576.03
Hopper,ARS-v2,3692.31,3813.62,3699.76,3672.91
Humanoid,ARS-v1,207.95,207.95,207.76,207.76
Humanoid,ARS-v2,7928.93,7699.32,1320.84,1158.05
Swimmer,ARS-v1,357.24,357.76,357.03,356.71
Swimmer,ARS-v2,358.89,359.15,356.98,358.88


\begin{table}
\caption{Best perf on each task}
\begin{tabular}{llrrrr}
\hline
 & transform & component\_clip & none & norm\_clip & signed \\
task & algo &  &  &  &  \\
\hline
\multirow[t]{2}{*}{Ant} & ARS-v1 & 3256.61 & 3241.83 & 2517.56 & 2393.79 \\
 & ARS-v2 & 5145.88 & 5167.54 & 4769.37 & 4629.30 \\
\cline{1-6}
\multirow[t]{2}{*}{HalfCheetah} & ARS-v1 & 4301.49 & 4289.97 & 3944.57 & 3973.40 \\
 & ARS-v2 & 6209.82 & 6142.34 & 5359.48 & 5620.93 \\
\cline{1-6}
\multirow[t]{2}{*}{Hopper} & ARS-v1 & 3018.37 & 3261.84 & 3055.70 & 2576.03 \\
 & ARS-v2 & 3692.31 & 3813.62 & 3699.76 & 3672.91 \\
\cline{1-6}
\multirow[t]{2}{*}{Humanoid} & ARS-v1 & 207.95 & 207.95 & 207.76 & 207.76 \\
 & ARS-v2 & 7928.93 & 7699.32 & 1320.84 & 1158.05 \\
\cline{1-6}
\multirow[t]{2}{*}{Swimmer} & ARS-v1 & 357.24 & 357.76 & 357.03 & 356.71 \\
 & ARS-v2 & 358.89 & 359.15 & 356.98 & 358.88 \\
\cline{1-6}
\multirow[t]{2}{*}{Walker2d} & ARS-v1 & 4009.33 & 3810.41 & 3045.92 & 3010.70 \\
 & ARS-v2 & 4427.34 & 3909.28 &

In [9]:
for (task, algo), data1 in best_data.groupby(["task", "algo"]):

    frames = []
    for filename, transform, algo, seed in zip(data1.dir_path, data1['transform'], data1["algo"], data.seed):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")[["AverageReward", "timesteps", "Iteration"]]
        x["task"] = task
        x["algo"] = algo
        x["transform"] = transform
        x["seed"] = seed
        frames.append(x)
    frame = pd.concat(frames, ignore_index=True)

    break



frame

Unnamed: 0,AverageReward,timesteps,Iteration,task,algo,transform,seed
0,1000.182819,0,0,Ant,ARS-v1,component_clip,1
1,1005.282180,1568175,10,Ant,ARS-v1,component_clip,1
2,1037.155642,3126254,20,Ant,ARS-v1,component_clip,1
3,1209.642860,4641563,30,Ant,ARS-v1,component_clip,1
4,1454.651500,6014781,40,Ant,ARS-v1,component_clip,1
...,...,...,...,...,...,...,...
721,2356.724669,43603163,320,Ant,ARS-v1,norm_clip,1
722,2392.946132,44993535,330,Ant,ARS-v1,norm_clip,1
723,2372.574421,46374588,340,Ant,ARS-v1,norm_clip,1
724,2400.760870,47761674,350,Ant,ARS-v1,norm_clip,1


In [10]:
def interp_frames(frames, n=1000):
    max_timestep = max(x.timesteps.max() for x in frames)
    common_timesteps = np.linspace(0, max_timestep, num=n)
    result = []
    for frame in frames:
        new_frame = pd.DataFrame({"Iteration": range(n)})
        new_frame["timesteps"] = common_timesteps
        new_frame["AverageReward"] = np.interp(common_timesteps, frame.timesteps, frame.AverageReward)
        for col in ["task", "algo", "transform", "seed"]:
            new_frame[col] = frame[col][0]
        result.append(new_frame)
    return pd.concat(result, ignore_index=True)

In [11]:
for (task, algo), data1 in best_data.groupby(["task", "algo"]):

    frames = []
    for filename, transform, algo, seed in zip(data1.dir_path, data1['transform'], data1["algo"], data.seed):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")[["AverageReward", "timesteps", "Iteration"]]
        x["task"] = task
        x["algo"] = algo
        x["transform"] = transform
        x["seed"] = seed
        frames.append(x)

    # frame = pd.concat(frames, ignore_index=True)
    frame = interp_frames(frames)
    frame = frame.groupby(["Iteration", 'transform']).agg({
        'AverageReward': ['mean', "std", "max"],
        "timesteps": 'mean'
    }).fillna(0)

    frame.columns = [f"{x}_{y}" for x, y in frame.columns]
    frame.reset_index(inplace=True)
    frame['algo'] = algo
    frame['task'] = task
    frame.rename({
        'timesteps_mean': "steps",
        'AverageReward_mean': "reward",
        'AverageReward_std': "reward_std",
        'AverageReward_max': "reward_max",
    }, axis=1, inplace=True)
    frame.steps = frame.steps.astype(int)
    frame.sort_values(["transform", "steps"], ignore_index=True, inplace=True)

    alpha = 0.2 if task=="Humanoid" and algo=="ARS-v1" else 0.5
    # frame.reward = frame.reward.ewm(alpha=alpha, adjust=False).mean()
    # fig = px.line(data_frame=frame, x="steps", y="reward", color="transform")
    # fig.update_traces(opacity=.7)
    fig = make_subplots()
    for (transform, mini_frame), color in zip(frame.groupby("transform"), px.colors.qualitative.Plotly):
        fig.add_trace(go.Scatter(
            x=mini_frame.steps,
            y=mini_frame.reward,
            mode="lines",
            name=transform,
            line=dict(width=2, color=color)
        ))

        fig.add_trace(
            go.Scatter(
                x=mini_frame.steps,
                y=mini_frame.reward-mini_frame.reward_std,
                mode="lines",
                line=dict(width=0.01, color=color),
                showlegend=False,
            ),
        )

        fig.add_trace(
            go.Scatter(
                x=mini_frame.steps,
                y=np.minimum(mini_frame.reward+mini_frame.reward_std, mini_frame.reward_max),
                mode="lines",
                line=dict(width=0.01, color=color),
                fill="tonexty",
                fillcolor=to_opacity(color, 0.2),
                showlegend=False,
            ),
        )
    update_layout(fig, task + ": " + algo, "Total Env Interactions", "Reward", row=1, col=1, upkwargs=dict(width=750, height=600))
    if (task=="Humanoid" and algo=="ARS-v1"):
        fig.update_layout(legend=dict(yanchor="bottom", y=0.6, xanchor="right", x=0.99))
    fig.write_image(f"../static/{task}_{algo}.png", scale=1.5)
    # fig.show()

In [12]:
template = r"""{
\includegraphics[width=8cm]{images_update/TASK_ARS-v1.png}
\includegraphics[width=8cm]{images_update/TASK_ARS-v2.png}
}\\"""

for task in best_data.task.unique():
    print(template.replace("TASK", task))

{
\includegraphics[width=8cm]{images_update/HalfCheetah_ARS-v1.png}
\includegraphics[width=8cm]{images_update/HalfCheetah_ARS-v2.png}
}\\
{
\includegraphics[width=8cm]{images_update/Walker2d_ARS-v1.png}
\includegraphics[width=8cm]{images_update/Walker2d_ARS-v2.png}
}\\
{
\includegraphics[width=8cm]{images_update/Humanoid_ARS-v1.png}
\includegraphics[width=8cm]{images_update/Humanoid_ARS-v2.png}
}\\
{
\includegraphics[width=8cm]{images_update/Ant_ARS-v1.png}
\includegraphics[width=8cm]{images_update/Ant_ARS-v2.png}
}\\
{
\includegraphics[width=8cm]{images_update/Hopper_ARS-v1.png}
\includegraphics[width=8cm]{images_update/Hopper_ARS-v2.png}
}\\
{
\includegraphics[width=8cm]{images_update/Swimmer_ARS-v1.png}
\includegraphics[width=8cm]{images_update/Swimmer_ARS-v2.png}
}\\


In [None]:
for algo, data1 in data.groupby("algo"):
    x = data1.drop(["dir_path", 'seed', 'n_iter', 'n_workers', 'reward', "one_sided"], axis=1)
    x = x[['task'] + [col for col in x if col not in ("env_name", "task", "algo")]].sort_values('task', ignore_index=True)
    print(
        x.to_latex(
            index=False,
            caption=f"Optimal hyperparameters from search for {algo}",
            escape=True,
            float_format="{:.2f}".format
        )
        .replace("\\toprule", "\\hline")
        .replace("\\midrule", "\\hline")
        .replace("\\bottomrule", "\\hline")
    )

In [22]:
def plot_env(env_name, data):
    df = []
    for filename, algo in zip(data.dir_path, data.algo):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")
        # x["algorithm"] = "ARS-1" if one_sided else "ARS-2"
        x["algorithm"] = algo
        df.append(x)
    df = pd.concat(df, axis=0).groupby(["Iteration", "algorithm"])[["timesteps", "AverageReward"]].mean().reset_index()
    # df.drop("Iteration", axis=1, inplace=True)
    df.rename({
        'timesteps': "steps",
        'AverageReward': "reward"
    }, axis=1, inplace=True)
    df.steps = df.steps.astype(int)
    df["env_name"] = env_name
    fig = px.line(data_frame=df, x="Iteration", y="reward", color="algorithm")
    update_layout(fig, env_name.replace("Safety","").replace("Velocity", ""), "Iteration", "Reward", row=1, col=1, upkwargs=dict(width=500, height=400))
    if "swimmer" in env_name.lower() or "walker" in env_name.lower() or "cheetah" in env_name.lower():
        fig.update_layout(legend=dict(xanchor="right", x=0.99, y=0.5))
    if "human" in env_name.lower():
        fig.update_layout(legend=dict(xanchor="left", x=0.0, y=0.5))
    fig.update_traces(opacity=.7)
    fig.write_image(f"../images/{env_name}.png", scale=3)
    fig.show()
    return df

In [23]:
for env_name, df in data.groupby("task"):
    plot_env(env_name, df)


### Unstability of SFR-1 wrt hyperparams.

In [18]:
sfr1 = []
for path, one_sided in zip(all_data.dir_path, all_data.one_sided):
    x = pd.read_csv(os.path.join("..", path, "log.txt"), sep="\t")
    _, x['task'], x['dir_path'] = path.split("/")
    x["one_sided"] = one_sided
    sfr1.append(x)
sfr1 = pd.concat(sfr1)
sfr1.head()

Unnamed: 0,Time,Iteration,AverageReward,StdRewards,MaxRewardRollout,MinRewardRollout,timesteps,task,dir_path,one_sided
0,3.120859,10,85.013145,8.373057,113.143287,70.084741,9039,SafetyHopperVelocity-v1,1724913801.39389,False
1,3.893283,20,116.756961,3.622604,127.02975,109.790013,19099,SafetyHopperVelocity-v1,1724913801.39389,False
2,4.887338,30,253.983855,6.214386,264.590033,237.759234,31264,SafetyHopperVelocity-v1,1724913801.39389,False
3,6.534218,40,699.049306,58.457038,871.497197,641.05983,51083,SafetyHopperVelocity-v1,1724913801.39389,False
4,8.626153,50,827.564412,5.561508,872.085952,820.293742,78719,SafetyHopperVelocity-v1,1724913801.39389,False


In [19]:
std1 = sfr1.groupby(["task","Iteration", "one_sided"]).AverageReward.std().reset_index()
std1

Unnamed: 0,task,Iteration,one_sided,AverageReward
0,SafetyAntVelocity-v1,10,False,262.374173
1,SafetyAntVelocity-v1,10,True,176.683966
2,SafetyAntVelocity-v1,20,False,377.357544
3,SafetyAntVelocity-v1,20,True,307.475426
4,SafetyAntVelocity-v1,30,False,481.286315
...,...,...,...,...
995,SafetyWalker2dVelocity-v1,980,True,471.524010
996,SafetyWalker2dVelocity-v1,990,False,1498.642665
997,SafetyWalker2dVelocity-v1,990,True,458.538249
998,SafetyWalker2dVelocity-v1,1000,False,1493.380902


In [21]:
for env_name, df in std1.groupby("task"):
    fig = px.line(data_frame=df, x="Iteration", y="AverageReward", color="one_sided")
    update_layout(fig, env_name.replace("Safety","").replace("Velocity", ""), "Iteration", "reward stddev", row=1, col=1, upkwargs=dict(width=500, height=400))
    fig.show()