In [1]:
from plotly.subplots import make_subplots
from plotting import plot_values, update_layout
import glob
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
import json
import os
import numpy as np

In [2]:
import webcolors
def to_opacity(name, opacity):
    r, g, b = webcolors.hex_to_rgb(name)
    return f"rgba({r}, {g}, {b}, {opacity})"

### Get the names of all files we want to read 

In [3]:
def get_best_reward(dir_path):
    try:
        x = pd.read_csv(os.path.join("..", dir_path, "log.txt"), sep="\t")
        return x["AverageReward"].max()
    except:
        print("?", dir_path)
        return -1

In [4]:
records = []
for experiment_file in glob.glob("../sfr2/*/*/params.json", recursive=True):
    with open(experiment_file, "r", encoding="utf-8") as file:
        record = json.load(file)
        records.append(record)

all_data = pd.DataFrame.from_records(records)
all_data['reward'] = all_data.dir_path.apply(get_best_reward)
# all_data = all_data[all_data.reward > 0]

all_data['task'] = all_data.env_name.str.replace("Safety", '').str.replace("Velocity-v1", '')
all_data["algo"] = "SFR-2"
all_data["transform"] = all_data["transform"].apply(lambda x: x.split(":")[0])
# all_data = all_data[~all_data.apply(lambda x: x['transform'] == 'none' and x['task'] == 'Hopper' and x['seed'] == 2, axis=1)]

# this contains max reward we want to match
all_data['best_transform_reward'] = all_data.groupby(["task", "algo", "transform"]).reward.transform('max')
all_data['best_config_reward'] = all_data.groupby(["task", "algo", "transform", "step_size", "delta_std", "filter"]).reward.transform('max')
all_data['best_seed_reward'] = all_data.groupby(["task", "algo", "transform", "step_size", "delta_std", "filter", "seed"]).reward.transform('max')
all_data.head()

Unnamed: 0,delta_std,deltas_used,dir_path,env_name,filter,n_directions,n_iter,n_workers,policy_type,rollout_length,seed,shift,step_size,transform,reward,task,algo,best_transform_reward,best_config_reward,best_seed_reward
0,0.02,1,sfr2/SafetyHalfCheetahVelocity-v1/1736182549.8...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,1,1000,10,linear,1000,3,0,0.01,norm_clip,4733.932914,HalfCheetah,SFR-2,5831.487413,5831.487413,4733.932914
1,0.02,1,sfr2/SafetyHalfCheetahVelocity-v1/1735284610.4...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,1,1000,10,linear,1000,1,0,0.005,norm_clip,3790.974044,HalfCheetah,SFR-2,5831.487413,4906.580814,3790.974044
2,0.02,1,sfr2/SafetyHalfCheetahVelocity-v1/1736047851.3...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,1,1000,10,linear,1000,1,0,0.04,signed,5271.57922,HalfCheetah,SFR-2,5271.57922,5271.57922,5271.57922
3,0.02,1,sfr2/SafetyHalfCheetahVelocity-v1/1735154535.5...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,1,1000,10,linear,1000,2,0,0.005,norm_clip,4906.580814,HalfCheetah,SFR-2,5831.487413,4906.580814,4906.580814
4,0.02,1,sfr2/SafetyHalfCheetahVelocity-v1/1736145158.2...,SafetyHalfCheetahVelocity-v1,MeanStdFilter,1,1000,10,linear,1000,2,0,0.01,norm_clip,5831.487413,HalfCheetah,SFR-2,5831.487413,5831.487413,5831.487413


In [5]:
data = (
    all_data[all_data["best_transform_reward"] == all_data["best_config_reward"]]
    .reset_index(drop=True)
    .drop(["filter", "policy_type", "rollout_length", "shift", "env_name"], axis=1)
)

data["task transform algo".split()].value_counts()

task         transform       algo 
HalfCheetah  component_clip  SFR-2    5
             none            SFR-2    5
             norm_clip       SFR-2    5
             signed          SFR-2    5
Hopper       component_clip  SFR-2    5
             none            SFR-2    5
             norm_clip       SFR-2    5
             signed          SFR-2    5
Swimmer      component_clip  SFR-2    5
             none            SFR-2    5
             norm_clip       SFR-2    5
             signed          SFR-2    5
Walker2d     component_clip  SFR-2    5
             none            SFR-2    5
             norm_clip       SFR-2    5
             signed          SFR-2    5
Name: count, dtype: int64

In [9]:
def to_paper(x: pd.DataFrame, caption, index=False) -> str:
    return (x.to_latex(
    index=index,
    caption=caption,
    escape=True,
    float_format="{:.2f}".format
)
.replace("\\toprule", "\\hline")
.replace("\\midrule", "\\hline")
.replace("\\bottomrule", "\\hline")
.replace("+-", " $\pm$ "))

In [10]:
# best reward for paper table
out = data.groupby(["task", "transform"]).reward.agg(['mean', 'std']).round(2).reset_index()
out = out[out.task != 'Ant']
out['reward'] = out.apply(lambda x: f"{x['mean']}+-{x['std']}", axis=1)
out.drop(['mean', 'std'], axis=1, inplace=True)
out = out.pivot_table('reward', 'task', 'transform', aggfunc='first').reset_index()
out["algo"] = "SFR-2"
out = out[['task', 'algo', *list(out)[1:-1]]]
print(to_paper(out, caption="SFR2"))

\begin{table}
\caption{SFR2}
\begin{tabular}{llllll}
\hline
task & algo & component\_clip & none & norm\_clip & signed \\
\hline
HalfCheetah & SFR-2 & 5762.27 $\pm$ 499.75 & 2977.42 $\pm$ 929.95 & 5082.51 $\pm$ 605.8 & 5042.62 $\pm$ 341.89 \\
Hopper & SFR-2 & 3256.43 $\pm$ 698.7 & 3215.54 $\pm$ 757.42 & 3123.01 $\pm$ 422.79 & 3194.95 $\pm$ 410.85 \\
Swimmer & SFR-2 & 357.19 $\pm$ 1.85 & 268.55 $\pm$ 121.01 & 357.4 $\pm$ 1.26 & 357.89 $\pm$ 1.36 \\
Walker2d & SFR-2 & 2718.52 $\pm$ 1875.69 & 1640.73 $\pm$ 1072.84 & 1382.85 $\pm$ 1051.86 & 1310.45 $\pm$ 1560.97 \\
\hline
\end{tabular}
\end{table}



In [7]:
data = (
    all_data[all_data["best_transform_reward"] == all_data["best_config_reward"]]
    .reset_index(drop=True)
    .drop(["filter", "policy_type", "rollout_length", "shift", "env_name"], axis=1)
)

data["task transform".split()].value_counts()

task         transform     
HalfCheetah  component_clip    5
             none              5
             norm_clip         5
             signed            5
Hopper       component_clip    5
             none              5
             norm_clip         5
             signed            5
Swimmer      component_clip    5
             none              5
             norm_clip         5
             signed            5
Walker2d     component_clip    5
             none              5
             norm_clip         5
             signed            5
Name: count, dtype: int64

In [8]:
def interp_frames(frames, n=2000):
    max_timestep = max(x.timesteps.max() for x in frames)
    common_timesteps = np.linspace(0, max_timestep, num=n)
    result = []
    for frame in frames:
        new_frame = pd.DataFrame({"Iteration": range(n)})
        new_frame["timesteps"] = common_timesteps
        new_frame["AverageReward"] = np.interp(common_timesteps, frame.timesteps, frame.AverageReward)
        for col in ["task", "algo", "transform", "seed"]:
            new_frame[col] = frame[col][0]
        result.append(new_frame)
    return pd.concat(result, ignore_index=True)

In [9]:
ylim =  {'HalfCheetah': 7000, 'Walker2d': 6000, 'Hopper': 4000, 'Swimmer': 370}

In [10]:
for (task, algo), data1 in data.groupby(["task", "algo"]):
    frames = []
    for filename, transform, algo, seed in zip(data1.dir_path, data1['transform'], data1["algo"], data1.seed):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")[["AverageReward", "timesteps", "Iteration"]]
        x["transform"] = transform
        x["task"] = task
        x["algo"] = algo
        x["seed"] = seed
        frames.append(x)
    frame = interp_frames(frames)
    # frame = pd.concat(frames, ignore_index=True).sort_values(["transform", "timesteps"], ignore_index=True)
    frame = frame.groupby(["Iteration", 'transform']).agg({
        'AverageReward': ['mean', "std", "max"],
        "timesteps": 'mean'
    }).fillna(0)

    frame.columns = [f"{x}_{y}" for x, y in frame.columns]
    frame.reset_index(inplace=True)
    frame['algo'] = algo
    frame['task'] = task
    frame.rename({
        'timesteps_mean': "steps",
        'AverageReward_mean': "reward",
        'AverageReward_std': "reward_std",
        'AverageReward_max': "reward_max",
    }, axis=1, inplace=True)

    frame.steps = frame.steps.astype(int)
    frame = frame.sort_values(["transform", "steps"], ignore_index=True)
    alpha = 0.2 if task=="Humanoid" and algo=="ARS-v1" else 0.5
    # frame.reward = frame.reward.ewm(alpha=alpha, adjust=False).mean()
    # fig = px.line(data_frame=frame, x="steps", y="reward", color="transform")
    # fig.update_traces(opacity=.7)
    fig = make_subplots()
    for (transform, mini_frame), color in zip(frame.groupby("transform"), px.colors.qualitative.Plotly):
        if transform == "none":
            transform = "no_transform"
        fig.add_trace(go.Scatter(
            x=mini_frame.steps,
            y=np.maximum(mini_frame.reward, 0),
            mode="lines",
            name=transform,
            line=dict(width=2, color=color)
        ))

        y = mini_frame.reward-mini_frame.reward_std
        y = np.maximum(y, 0)
        fig.add_trace(
            go.Scatter(
                x=mini_frame.steps,
                y=y,
                mode="lines",
                line=dict(width=0.01, color=color),
                showlegend=False,
            ),
        )

        y = np.minimum(mini_frame.reward+mini_frame.reward_std, mini_frame.reward_max)
        y = np.maximum(y, 0)
        fig.add_trace(
            go.Scatter(
                x=mini_frame.steps,
                y=y,
                mode="lines",
                line=dict(width=0.01, color=color),
                fill="tonexty",
                fillcolor=to_opacity(color, 0.2),
                showlegend=False,
            ),
        )
    update_layout(fig, task + ": " + algo, "Total Env Interactions", "Reward", row=1, col=1, upkwargs=dict(width=500, height=400))
    fig.update_layout(yaxis={"autorange": False, 'range':[-1, ylim[task]]})
    if (task=="Humanoid" and algo=="ARS-v1"):
        fig.update_layout(legend=dict(yanchor="bottom", y=0.6, xanchor="right", x=0.99))
    fig.write_image(f"../static/{task}_{algo}.png", scale=1.5)

In [11]:
data = (
    all_data[all_data["best_seed_reward"] == all_data["best_transform_reward"]]
    .reset_index(drop=True)
    .drop(["filter", "policy_type", "rollout_length", "shift", "env_name"], axis=1)
)

data["task transform".split()].value_counts()

task         transform     
HalfCheetah  component_clip    1
             none              1
             norm_clip         1
             signed            1
Hopper       component_clip    1
             none              1
             norm_clip         1
             signed            1
Swimmer      component_clip    1
             none              1
             norm_clip         1
             signed            1
Walker2d     component_clip    1
             none              1
             norm_clip         1
             signed            1
Name: count, dtype: int64

In [12]:
for (task, algo), data1 in data.groupby(["task", "algo"]):
    frames = []
    for filename, transform, algo, seed in zip(data1.dir_path, data1['transform'], data1["algo"], data1.seed):
        x = pd.read_csv("../"+filename+"/log.txt", sep="\t")[["AverageReward", "timesteps", "Iteration"]]
        x["transform"] = transform
        x["task"] = task
        x["algo"] = algo
        x["seed"] = seed
        frames.append(x)
    frame = interp_frames(frames)
    # frame = pd.concat(frames, ignore_index=True).sort_values(["transform", "timesteps"], ignore_index=True)
    # frame = frame.groupby(["Iteration", 'transform']).agg({
    #     'AverageReward': ['mean', "std", "max"],
    #     "timesteps": 'mean'
    # }).fillna(0)

    # frame.columns = [f"{x}_{y}" for x, y in frame.columns]
    # frame.reset_index(inplace=True)
    frame['algo'] = algo
    frame['task'] = task
    frame.rename({
        'timesteps': "steps",
        'AverageReward': "reward",
    }, axis=1, inplace=True)

    frame.steps = frame.steps.astype(int)
    frame = frame.sort_values(["transform", "steps"], ignore_index=True)
    alpha = 0.2 if task=="Humanoid" and algo=="ARS-v1" else 0.5
    # frame.reward = frame.reward.ewm(alpha=alpha, adjust=False).mean()
    frame["transform"] = frame["transform"].str.replace("none", "no_transform")
    fig = px.line(data_frame=frame, x="steps", y="reward", color="transform")
    update_layout(fig, task + ": " + algo, "Total Env Interactions", "Reward", row=1, col=1, upkwargs=dict(width=500, height=400))
    fig.update_layout(yaxis={"autorange": False, 'range':[-1, ylim[task]]})
    if (task=="Humanoid" and algo=="ARS-v1"):
        fig.update_layout(legend=dict(yanchor="bottom", y=0.6, xanchor="right", x=0.99))
    fig.write_image(f"../static/{task}_{algo}_max.png", scale=1.5)
    # fig.show()

### Show the best hparams

In [41]:
def to_paper(x: pd.DataFrame, caption, index=False) -> str:
    latex_code = x.to_latex(
        index=index,
        caption=caption,
        escape=True,
        float_format="{:.3f}".format,
        column_format="|".join(["|l"] + ["c"] * len(x.columns))  # Add borders to columns
    ).replace("\\toprule", "\\hline") \
     .replace("\\midrule", "\\hline") \
     .replace("\\bottomrule", "\\hline") \
     .replace("+-", "$\pm$") \
     .replace("\\begin{table}", "\\begin{table}\n\\centering")

    # Move the caption to the end of the table
    lines = latex_code.split("\n")
    caption_line = next((line for line in lines if "\\caption" in line), None)
    if caption_line:
        lines.remove(caption_line)
        lines.insert(-2, caption_line)  # Insert before the end of the table
    return "\n".join(lines)

In [49]:
params = data[["delta_std", "step_size", "transform", "task"]].sort_values(
    "transform", ignore_index=True
)

params['transform'] = params['transform'].str.replace("none", 'no_transform')

for task, group in params.groupby("task"):
    x = to_paper(group.drop("task", axis=1), caption=f"\\label{{{task.lower()}_hparams_sfr}}SFR best hyperparameters for {task}")
    print(x)

\begin{table}
\centering
\begin{tabular}{|l|c|c|c}
\hline
delta\_std & step\_size & transform \\
\hline
0.020 & 0.005 & component\_clip \\
0.020 & 0.005 & no\_transform \\
0.020 & 0.010 & norm\_clip \\
0.020 & 0.040 & signed \\
\hline
\end{tabular}
\caption{\label{halfcheetah_hparams_sfr}SFR best hyperparameters for HalfCheetah}
\end{table}

\begin{table}
\centering
\begin{tabular}{|l|c|c|c}
\hline
delta\_std & step\_size & transform \\
\hline
0.020 & 0.005 & component\_clip \\
0.020 & 0.005 & no\_transform \\
0.020 & 0.005 & norm\_clip \\
0.020 & 0.005 & signed \\
\hline
\end{tabular}
\caption{\label{hopper_hparams_sfr}SFR best hyperparameters for Hopper}
\end{table}

\begin{table}
\centering
\begin{tabular}{|l|c|c|c}
\hline
delta\_std & step\_size & transform \\
\hline
0.020 & 0.020 & component\_clip \\
0.020 & 0.020 & no\_transform \\
0.020 & 0.020 & norm\_clip \\
0.020 & 0.020 & signed \\
\hline
\end{tabular}
\caption{\label{swimmer_hparams_sfr}SFR best hyperparameters for Swimmer}

In [66]:
for task, group in all_data.groupby("task"):
    x = pd.DataFrame({
        col : [group[col].unique().tolist()]
        for col in ["delta_std", "step_size", "transform"]
    }).T.reset_index()
    x.columns = ['Parameters', "Values"]
    print(to_paper(x, caption=f"{task} hyperparameters for SFR"))

\begin{table}
\centering
\begin{tabular}{|l|c|c}
\hline
Parameters & Values \\
\hline
delta\_std & [0.02] \\
step\_size & [0.01, 0.005, 0.04, 0.03] \\
transform & ['norm_clip', 'signed', 'none', 'component_clip'] \\
\hline
\end{tabular}
\caption{HalfCheetah hyperparameters for SFR}
\end{table}

\begin{table}
\centering
\begin{tabular}{|l|c|c}
\hline
Parameters & Values \\
\hline
delta\_std & [0.02] \\
step\_size & [0.005] \\
transform & ['none', 'norm_clip', 'component_clip', 'signed'] \\
\hline
\end{tabular}
\caption{Hopper hyperparameters for SFR}
\end{table}

\begin{table}
\centering
\begin{tabular}{|l|c|c}
\hline
Parameters & Values \\
\hline
delta\_std & [0.02, 0.01] \\
step\_size & [0.02] \\
transform & ['norm_clip', 'signed', 'component_clip', 'none'] \\
\hline
\end{tabular}
\caption{Swimmer hyperparameters for SFR}
\end{table}

\begin{table}
\centering
\begin{tabular}{|l|c|c}
\hline
Parameters & Values \\
\hline
delta\_std & [0.025] \\
step\_size & [0.003, 0.01] \\
transform & 

In [70]:
x = pd.read_csv('../sfr2/SafetyHalfCheetahVelocity-v1/1736182549.8540034/log.txt', sep="\t")
x

Unnamed: 0,Time,Iteration,AverageReward,StdRewards,MaxRewardRollout,MinRewardRollout,timesteps,gradnorms,maxnorms
0,36.493412,0,-0.325686,0.691846,1.18188,-2.499438,0,0.0,0.0
1,641.84217,800,1697.730068,5.728183,1711.187184,1684.709218,1600000,0.998752,0.426544
2,1247.609184,1600,2422.587988,14.440112,2460.90295,2372.774473,3200000,0.999001,0.417333
3,1845.116063,2400,2525.222841,29.670941,2612.955334,2456.514955,4800000,0.999001,0.417333
4,2442.373228,3200,2749.696634,26.646072,2805.335236,2632.824996,6400000,0.999001,0.431191
5,3037.526251,4000,2772.759091,15.813138,2809.905879,2742.589541,8000000,0.999001,0.431191
6,3633.246091,4800,3103.406059,31.618062,3182.536259,3006.019025,9600000,0.999001,0.406436
7,4227.283278,5600,3057.643167,20.943568,3122.287108,2990.137829,11200000,0.999001,0.437846
8,4821.6308,6400,3285.963751,35.477953,3383.863631,3200.526682,12800000,0.999001,0.411291
9,5415.639932,7200,3458.778167,33.856973,3536.971567,3362.474556,14400000,0.999001,0.445962


In [38]:
def get_best_reward(dir_path):
    try:
        x = pd.read_csv(os.path.join("..", dir_path, "log.txt"), sep="\t")
        # find the timestep closest to 1e6
        return x[x.timesteps <= 1e6+1000]["AverageReward"].max()
    except:
        print("?", dir_path)
        return -1

data['reward2'] = data.dir_path.apply(get_best_reward)
# first 3 seeds
x = data.groupby(["task", "transform"]).reward2.agg(list).apply(lambda x: np.mean(x[:3])).reset_index()
x.groupby(["task"]).reward2.max().astype(int)

task
HalfCheetah    2856
Hopper         2975
Swimmer         118
Walker2d        547
Name: reward2, dtype: int64