> **Results from the best reward experiment.** \
> Comparison uses TD3, SAC, and DSAC with three different reward functions:
> 1. $r=e$
> 1. $r= e + \Delta u$
> 1. $r= e + \Delta u + u$

# ⚙️ Set-up
Loads required packages and set seaborn as plotting style

In [1]:
import pandas as pd
import plotly.express as px
import seaborn as sns
import numpy as np

from helpers.paths import Path
from visualization.utils import make_smooth
sns.set()

Load and filter data

In [2]:
# Load data
df_learn = pd.read_csv(Path.data/"best_reward_learning.csv")
df_ep = pd.read_csv(Path.data/"best_reward_episode.csv")

df_learn_s = pd.read_csv(Path.data/"best_reward_square_learning.csv")
df_ep_s = pd.read_csv(Path.data/"best_reward_square_episode.csv")

# Keep only DSAC data
df_learn_dsac = df_learn[df_learn.algorithm == "DSAC"]
df_ep_dsac = df_ep[df_ep.algorithm == "DSAC"]

Make the data from the learning steps smooth

In [3]:
df_learn_smooth = make_smooth(df_learn, step=100, 
                              on="global_step", 
                              columns=["rollout/ep_rew_mean", "rollout/ep_len_mean"],
                              info=["algorithm", "reward_type"])

df_learn_s_smooth = make_smooth(df_learn_s, step=100, 
                              on="global_step", 
                              columns=["rollout/ep_rew_mean", "rollout/ep_len_mean"],
                              info=["algorithm", "reward_type"])


In [4]:
df_learn_s

Unnamed: 0.1,Unnamed: 0,global_step,rollout/ep_len_mean,rollout/ep_rew_mean,algorithm,reward_type,run
0,0,2,2.000000,-62.325705,DSAC,sq_error_da_a,dashing-night-275
1,1,3,1.000000,-32.963623,DSAC,sq_error_da_a,dashing-night-275
2,2,4,1.000000,-53.164631,DSAC,sq_error_da_a,dashing-night-275
3,3,5,1.000000,-40.050033,DSAC,sq_error_da_a,dashing-night-275
4,4,6,1.000000,-47.143479,DSAC,sq_error_da_a,dashing-night-275
...,...,...,...,...,...,...,...
36671,95,9523,99.197914,-2.614743,SAC,sq_error,dainty-fire-5
36672,96,9624,99.216492,-2.602090,SAC,sq_error,dainty-fire-5
36673,97,9725,99.234695,-2.593019,SAC,sq_error,dainty-fire-5
36674,98,9826,99.252525,-2.585945,SAC,sq_error,dainty-fire-5


# 👨🏽‍🏫 Learning performance

## Episode length
The reward functions are different therefore, the maximum return they achieve is 

In [5]:
g = sns.relplot(data=df_learn_smooth, x="step", y="rollout/ep_len_mean", hue="reward_type", kind="line", col="algorithm")

ValueError: Could not interpret value `step` for parameter `x`

In [None]:
g = sns.relplot(data=df_learn_s_smooth, x="step", y="rollout/ep_len_mean", hue="reward_type", kind="line", col="algorithm")

In [None]:
df_ep_run = df_ep.groupby(["run", "algorithm", "reward_type"])[["reward", "tracking_error"]].sum().reset_index()
df_ep_run[df_ep_run.algorithm=="SAC"]["tracking_error"].nunique()

df_ep_run
sns.barplot(data=df_ep_run, x="algorithm", y="tracking_error", hue="reward_type")

In [None]:
algorithm = "DSAC"
df_ep_rew = df_ep_s.groupby(["run", "algorithm", "reward_type"])["reward"].sum().reset_index()
reward_count = df_ep_rew[df_ep_rew.algorithm==algorithm]["reward"].value_counts()
duplicated_rewards = reward_count[reward_count>1].index.values

df_ep_rew[df_ep_rew.reward.isin(duplicated_rewards)].groupby(["reward_type", "reward", "run"]).count()