### **Combine plots of rewards and lengths in envs**

In [24]:
import pathlib
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('../sac/utils/custom.mplstyle')

def moving_average(curve, window):
    if not window or window <= 1:
        return curve
    window = min(window, len(curve))
    kernel = np.ones(window) / window
    
    # Pad both sides with reflection to avoid edge drop
    padded = np.pad(curve, (window//2, window//2), mode='reflect')
    
    return np.convolve(padded, kernel, mode='valid')


def plot_with_moving_average(curve, label=None, window=None):
        (raw_line,) = plt.plot(curve, alpha=0.5, label='_nolegend_')
        if window and window > 1:
            color = raw_line.get_color()
            ma_curve = moving_average(curve, window)
            ma_label = f"{label} (MA {window})" if label else f"MA {window}"
            plt.plot(ma_curve, label=ma_label, color=color, linestyle='-')

def make_and_save_graph2(
        number_of_curves: int,
        data: list,
        xlabel: str,
        ylabel: str,
        filename: str,
        run_dir: Path,
        legend: list[str] = None,
        moving_average_window: int | None = None,
    ) -> None:
        plt.figure()
        for i in range(number_of_curves):
            series = data[i]
            label = legend[i] if legend else None
            if moving_average_window:
                plot_with_moving_average(series, label, moving_average_window)
            else:
                plt.plot(series, label=label)
        # plt.title(title)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        # plt.grid(True)
        graph_path = run_dir + "/" + filename
        # if legend:
        plt.legend(legend)
        plt.savefig(graph_path)
        plt.close()

## **OneDPointMassReachEnv, QuadraticActionRewardEnv**

In [25]:
# Load our SAC and SB3_SAC rewards and lengths
import sys
sys.path.append('..')
from sac.utils.logger_utils import *

oneDPoint = "runs/OneDPointMassReachEnv/SAC/sac-point-mass-reach-2025_11_29-18_58_28"
oneDPoint_rewards = load_rewards(oneDPoint)
oneDPoint_lengths = load_lengths(oneDPoint)

# Quadratic = 'runs/QuadraticActionRewardEnv/SAC/sac-quadratic-action-2025_11_30-00_09_34'

# Quadratic_rewards = load_rewards(Quadratic)
# Quadratic_lengths = load_lengths(Quadratic)


rewards = [oneDPoint_rewards]
# make_and_save_graph2(1, rewards, "Episode", "Reward", "OneDPoint_rewards.pdf", oneDPoint, ["OneDPointMassReachEnv"], moving_average_window=50)

lengths = [oneDPoint_lengths]
# make_and_save_graph2(1, lengths, "Episode", "Length", "OneDPoint_lengths.pdf", oneDPoint, ["OneDPointMassReachEnv"], moving_average_window=50)

## **Inverted Pendulum v5**

In [26]:
pend_sac = 'runs/InvertedPendulum-v5/SAC/sac-inverted-pendulum-2025_11_29-14_54_49'
pend_sac_rewards = load_rewards(pend_sac)
pend_sac_lengths = load_lengths(pend_sac)

pend_sac_sb3 = 'runs/InvertedPendulum-v5/sac_sb3-sin_param'

pend_sac_sb3_rewards = load_rewards(pend_sac_sb3)
pend_sac_sb3_lengths = load_lengths(pend_sac_sb3)

rewards = [pend_sac_sb3_rewards, pend_sac_rewards]
make_and_save_graph2(2, rewards, "Episode", "Reward", "compare_InvertedPendulumSAC_SB3_rewards.pdf", 'runs/InvertedPendulum-v5', ["SB3-SAC", "SAC"], moving_average_window=50)
lengths = [pend_sac_sb3_lengths, pend_sac_lengths]
make_and_save_graph2(2, lengths, "Episode", "Length", "compare_InvertedPendulumSAC_SB3_lengths.pdf", "runs/InvertedPendulum-v5", ["SB3-SAC", "SAC"], moving_average_window=50)

## **BipedalWalker**

In [None]:
bip_sac = 'runs/BipedalWalker-v3/SAC/'#completar
bip_sac_rewards = load_rewards(bip_sac)
bip_sac_lengths = load_lengths(bip_sac)

bip_sac_sb3 = 'runs/BipedalWalker-v3/' #completar

bip_sac_sb3_rewards = load_rewards(bip_sac_sb3)
bip_sac_sb3_lengths = load_lengths(bip_sac_sb3)

rewards = [bip_sac_sb3_rewards, bip_sac_rewards]
make_and_save_graph2(2, rewards, "Episode", "Reward", "compare_BipedalWalkerSAC_SB3_rewards.pdf", 'runs/BipedalWalker-v3', ["SB3-SAC", "SAC"], moving_average_window=50)
lengths = [bip_sac_sb3_lengths, bip_sac_lengths]
make_and_save_graph2(2, lengths, "Episode", "Length", "compare_BipedalWalkerSAC_SB3_lengths.pdf", "runs/BipedalWalker-v3", ["SB3-SAC", "SAC"], moving_average_window=50)


## **DonkeyCar**

In [None]:
donk_sac = 'runs/DonkeyCar-v0/SAC/' #completar
donk_sac_rewards = load_rewards(donk_sac)
donk_sac_lengths = load_lengths(donk_sac)

rewards = [donk_sac_rewards]
make_and_save_graph2(1, rewards, "Episode", "Reward", "DonkeyCar_rewards.pdf", donk_sac, ["DonkeyCar-v0"], moving_average_window=50)
lengths = [donk_sac_lengths]
make_and_save_graph2(1, lengths, "Episode", "Length", "DonkeyCar_lengths.pdf", donk_sac, ["DonkeyCar-v0"], moving_average_window=50)
