### **Combine plots of rewards and lengths in envs**

In [1]:
import pathlib
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('../sac/utils/custom.mplstyle')

def moving_average(curve, window):
    curve = np.asarray(curve, dtype=float)
    if curve.size == 0:
        return curve
    if not window or window <= 1:
        return curve
    window = min(int(window), len(curve))
    if window <= 1:
        return curve
    # Use convolution but normalize by the actual number of contributing samples
    # at the edges so the mean at the ends matches the true average of available samples.
    kernel = np.ones(window, dtype=float)
    numerator = np.convolve(curve, kernel, mode='same')
    counts = np.convolve(np.ones_like(curve, dtype=float), kernel, mode='same')
    return numerator / counts


def plot_with_moving_average(curve, label=None, window=None):
    x = np.arange(len(curve))
    (raw_line,) = plt.plot(x, curve, alpha=0.5, label='_nolegend_')
    if window and window > 1:
        color = raw_line.get_color()
        ma_curve = moving_average(curve, window)
        plt.plot(x, ma_curve, label=label, color=color, linestyle='-')


def make_and_save_graph2(
        number_of_curves: int,
        data: list,
        xlabel: str,
        ylabel: str,
        filename: str,
        run_dir: Path,
        legend: list[str] = None,
        moving_average_window: int | None = None,
        legend_position = 'best',
        legend_fontsize = 12
    ) -> None:
    # plt.figure(figsize=(8, 4))
    for i in range(number_of_curves):
        series = np.asarray(data[i])
        label = legend[i] if legend and i < len(legend) else None
        if moving_average_window and moving_average_window > 1:
            plot_with_moving_average(series, label, moving_average_window)
        else:
            x = np.arange(len(series))
            plt.plot(x, series, label=label)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # rely on plotted line labels (we set them appropriately in plot functions)
    plt.legend(loc=legend_position, fontsize=legend_fontsize)
    # plt.tight_layout()
    graph_path = Path(run_dir) / filename
    plt.savefig(graph_path)
    plt.close()

## **OneDPointMassReachEnv, QuadraticActionRewardEnv**

In [10]:
# Load our SAC and SB3_SAC rewards and lengths
import sys
sys.path.append('..')
from sac.utils.logger_utils import *

oneDPoint = "runs/OneDPointMassReachEnv/SAC/sac-point-mass-reach-2025_11_29-18_58_28"
oneDPoint_rewards = load_rewards(oneDPoint)
oneDPoint_lengths = load_lengths(oneDPoint)

# Quadratic = 'runs/QuadraticActionRewardEnv/SAC/sac-quadratic-action-2025_11_30-00_09_34'

# Quadratic_rewards = load_rewards(Quadratic)
# Quadratic_lengths = load_lengths(Quadratic)


rewards = [oneDPoint_rewards]
make_and_save_graph2(1, rewards, "Episode", "Reward", "OneDPoint_rewards.pdf", oneDPoint, ["OneDPointMassReachEnv"], moving_average_window=50)

lengths = [oneDPoint_lengths]
# make_and_save_graph2(1, lengths, "Episode", "Length", "OneDPoint_lengths.pdf", oneDPoint, ["OneDPointMassReachEnv"], moving_average_window=50)

## **Inverted Pendulum v5**

In [3]:
pend_sac_alpha_fixed = 'runs/InvertedPendulum-v5/SAC/sac-inverted-pendulum-2025_11_30-11_48_27'
pend_sac_fix_alpha_rewards = load_rewards(pend_sac_alpha_fixed)
pend_sac_fix_alpha_lengths = load_lengths(pend_sac_alpha_fixed)

pend_sac_alpha_dyn = 'runs/InvertedPendulum-v5/SAC/sac-inverted-pendulum-2025_11_30-11_40_35'
pend_sac_dyn_alpha_rew = load_rewards(pend_sac_alpha_dyn)
pend_sac_dyn_alpha_len = load_lengths(pend_sac_alpha_dyn)

pend_sac_sb3 = 'runs/InvertedPendulum-v5/sac_sb3-sin_param'

pend_sac_sb3_rewards = load_rewards(pend_sac_sb3)
pend_sac_sb3_lengths = load_lengths(pend_sac_sb3)

rewards = [pend_sac_sb3_rewards, pend_sac_fix_alpha_rewards, pend_sac_dyn_alpha_rew]

make_and_save_graph2(
    3,
    rewards,
    "Episode",
    "Reward",
    "compare_InvertedPendulumSAC_SB3_rewards.pdf",
    'runs/InvertedPendulum-v5',
    ["SB3-SAC", r"SAC ($\alpha_{\text{fijo}}$)", r"SAC ($\alpha_{\text{dinámico}}$)"],
    moving_average_window=30,
    legend_position='lower right', 
    legend_fontsize=8
)
# lengths = [pend_sac_sb3_lengths, pend_sac_fix_alpha_lengths]
# make_and_save_graph2(2, lengths, "Episode", "Length", "compare_InvertedPendulumSAC_SB3_lengths.pdf", "runs/InvertedPendulum-v5", ["SB3-SAC", "SAC"], moving_average_window=50)

## **BipedalWalker**

In [4]:
bip_sac_alpha_fixed = 'runs/BipedalWalker-v3/SAC/sac-bipedal-walker-2025_11_30-13_02_50'
bip_sac_fix_alpha_rewards = load_rewards(bip_sac_alpha_fixed)[:439]
bip_sac_fix_alpha_lengths = load_lengths(bip_sac_alpha_fixed)

# bip_sac_alpha_dyn = 'runs/InvertedPendulum-v5/SAC/sac-inverted-pendulum-2025_11_30-11_40_35'
# bip_sac_dyn_alpha_rew = load_rewards(bip_sac_alpha_dyn)
# bip_sac_dyn_alpha_len = load_lengths(bip_sac_alpha_dyn)

bip_sac_sb3 = 'runs/BipedalWalker-v3/sac_sb3-3'

bip_sac_sb3_rewards = load_rewards(bip_sac_sb3)
bip_sac_sb3_lengths = load_lengths(bip_sac_sb3)

rewards = [bip_sac_sb3_rewards, bip_sac_fix_alpha_rewards]#, bip_sac_dyn_alpha_rew]

make_and_save_graph2(
    2,
    rewards,
    "Episode",
    "Reward",
    "compare_BipedalWalkerSAC_SB3_rewards.pdf",
    'runs/BipedalWalker-v3',
    ["SB3-SAC", r"SAC ($\alpha_{\text{fijo}}$)"],# r"SAC ($\alpha_{\text{dinámico}}$)"],
    moving_average_window=30,
    legend_position='lower right', 
    legend_fontsize=8
)

In [5]:
# bip_sac = 'runs/BipedalWalker-v3/SAC/'#completar
# bip_sac_rewards = load_rewards(bip_sac)
# bip_sac_lengths = load_lengths(bip_sac)

# bip_sac_sb3 = 'runs/BipedalWalker-v3/' #completar

# bip_sac_sb3_rewards = load_rewards(bip_sac_sb3)
# bip_sac_sb3_lengths = load_lengths(bip_sac_sb3)

# rewards = [bip_sac_sb3_rewards, bip_sac_rewards]
# make_and_save_graph2(2, rewards, "Episode", "Reward", "compare_BipedalWalkerSAC_SB3_rewards.pdf", 'runs/BipedalWalker-v3', ["SB3-SAC", "SAC"], moving_average_window=50)
# lengths = [bip_sac_sb3_lengths, bip_sac_lengths]
# make_and_save_graph2(2, lengths, "Episode", "Length", "compare_BipedalWalkerSAC_SB3_lengths.pdf", "runs/BipedalWalker-v3", ["SB3-SAC", "SAC"], moving_average_window=50)


## **DonkeyCar**

In [None]:
# 2025_11_30-14_11_59 -> level 0
# 2025_11_29-16_30_44 -> level 2
donk_sac_0 = 'runs/DonkeyVae-v0/SAC/sac-donkey-vae-2025_11_30-14_11_59'
donk_sac_0_rewards = load_rewards(donk_sac_0)
donk_sac__0_lengths = load_lengths(donk_sac_0)


donk_sac_2 = 'runs/DonkeyVae-v0/SAC/sac-donkey-vae-2025_11_29-16_30_44'
donk_sac_2_rewards = load_rewards(donk_sac_2)[:280]
donk_sac_2_lengths = load_lengths(donk_sac_2)

rewards = [donk_sac_0_rewards, donk_sac_2_rewards]
make_and_save_graph2(
    2,
    rewards,
    "Episode",
    "Reward",
    "compare_DonkeyCar_level0_level2_rewards.pdf",
    'runs/DonkeyVae-v0',
    ["SAC (level 0)", "SAC (level 2)"],
    moving_average_window=50,
    legend_position='lower right', 
    legend_fontsize=8
)
