In [13]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns

from utils import FORMATTER, FONT

sns.set_theme(style="white", rc={"grid.color": "lightgray"})
plt.rc("axes", titlesize=16, titleweight="bold", labelsize=14, labelweight="bold")
plt.rc("xtick", labelsize=12)
plt.rc("ytick", labelsize=12)
font = font_manager.FontProperties(weight="bold")

In [None]:
def smooth_curve(points: np.ndarray, factor: int = 100):
    cumsum = np.cumsum(np.insert(points, 0, 0))
    return (cumsum[factor:] - cumsum[:-factor]) / float(factor)


def plot_result(
    title,
    results: dict,
    dylam_results: dict,
    formatter: ticker.ScalarFormatter,
    colors: dict,
    y_label: str,
    base_path: str = ".",
    x_label: str = "Number of training steps",
    smooth_factor: int = 100,
    smooth_factor_min_max: int = 100,
):
    fig, ax = plt.subplots(figsize=(10, 6))
    for method in results.keys():
        mean_key = [
            key
            for key in results[method].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]
        min_key = [key for key in results[method].keys() if key.endswith("MIN")][0]
        max_key = [key for key in results[method].keys() if key.endswith("MAX")][0]

        x = results[method]["Step"].loc[smooth_factor - 1 :]
        y = smooth_curve(results[method][mean_key], factor=smooth_factor)
        ax.plot(x, y, label=method, color=colors[method])

        x = results[method]["Step"].loc[smooth_factor_min_max - 1 :]
        y_min = smooth_curve(results[method][min_key], smooth_factor_min_max)
        y_max = smooth_curve(results[method][max_key], smooth_factor_min_max)
        ax.fill_between(x, y_min, y_max, color=colors[method], alpha=0.2)

    mean_key = [
        key
        for key in dylam_results.keys()
        if not (key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN"))
    ][0]
    min_key = [key for key in dylam_results.keys() if key.endswith("MIN")][0]
    max_key = [key for key in dylam_results.keys() if key.endswith("MAX")][0]

    x = dylam_results["Step"].loc[smooth_factor - 1 :]
    y = smooth_curve(dylam_results[mean_key], factor=smooth_factor)
    ax.plot(x, y, label="Reported", color=colors["DyLam"])

    x = dylam_results["Step"].loc[smooth_factor_min_max - 1 :]
    y_min = smooth_curve(dylam_results[min_key], smooth_factor_min_max)
    y_max = smooth_curve(dylam_results[max_key], smooth_factor_min_max)
    ax.fill_between(x, y_min, y_max, color=colors["DyLam"], alpha=0.2)

    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label, labelpad=1)
    ax.grid(True)
    ax.legend(prop=FONT, loc="best")
    ax.set_title(f"{title}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.savefig(f"{base_path}/{title}.pdf", format="pdf", bbox_inches="tight")
    plt.close()

## Tau ablation

In [15]:
TAU_METHOD_COLORS = {
    "0.5": "#EABE39",
    "0.7": "#EA39BE",
    "0.9": "#3965EA",
    "DyLam": "#39EA65",
}

rewards = {comp: {str(tau/10): pd.read_csv(f"tau/rew-{comp}-{tau}.csv") for tau in [5, 7, 9]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_rewards = {comp: pd.read_csv(f"ChickenBanana/rew-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

lambdas = {comp: {str(tau/10): pd.read_csv(f"tau/lambda-{comp}-{tau}.csv") for tau in [5, 7, 9]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_lambdas = {comp: pd.read_csv(f"ChickenBanana/lambda-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

for comp in ["Banana", "Chicken", "Gate"]:
    plot_result(
        f"{comp} Component - Different Tau Values",
        rewards[comp],
        dylam_rewards[comp],
        FORMATTER,
        TAU_METHOD_COLORS,
        y_label="Cumulative Episode Reward",
        base_path="./tau"
    )
    plot_result(
        f"{comp} Lambda - Different Tau Values",
        lambdas[comp],
        dylam_lambdas[comp],
        FORMATTER,
        TAU_METHOD_COLORS,
        y_label="Lambda Value",
        base_path="./tau"
    )

## RB ablation

In [16]:
RB_METHOD_COLORS = {
    "50": "#EABE39",
    "100": "#EA39BE",
    "500": "#3965EA",
    "DyLam": "#39EA65",
}

rewards = {comp: {str(rb): pd.read_csv(f"rb/rew-{comp}-{rb}.csv") for rb in [50, 100, 500]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_rewards = {comp: pd.read_csv(f"ChickenBanana/rew-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

lambdas = {comp: {str(rb): pd.read_csv(f"rb/lambda-{comp}-{rb}.csv") for rb in [50, 100, 500]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_lambdas = {comp: pd.read_csv(f"ChickenBanana/lambda-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

for comp in ["Banana", "Chicken", "Gate"]:
    plot_result(
        f"{comp} Component - Different RB Values",
        rewards[comp],
        dylam_rewards[comp],
        FORMATTER,
        RB_METHOD_COLORS,
        y_label="Cumulative Episode Reward",
        base_path="./rb"
    )
    plot_result(
        f"{comp} Lambda - Different RB Values",
        lambdas[comp],
        dylam_lambdas[comp],
        FORMATTER,
        RB_METHOD_COLORS,
        y_label="Lambda Value",
        base_path="./rb"
    )

## Normalizer ablation

In [17]:
NORMALIZER_METHOD_COLORS = {
    "l1": "#EABE39",
    "minmax": "#3965EA",
    "DyLam": "#39EA65",
}

rewards = {comp: {normalizer: pd.read_csv(f"normalizer/rew-{comp}-{normalizer}.csv") for normalizer in ["l1", "minmax"]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_rewards = {comp: pd.read_csv(f"ChickenBanana/rew-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

lambdas = {comp: {normalizer: pd.read_csv(f"normalizer/lambda-{comp}-{normalizer}.csv") for normalizer in ["l1", "minmax"]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_lambdas = {comp: pd.read_csv(f"ChickenBanana/lambda-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

for comp in ["Banana", "Chicken", "Gate"]:
    plot_result(
        f"{comp} Component - Different Normalizers",
        rewards[comp],
        dylam_rewards[comp],
        FORMATTER,
        NORMALIZER_METHOD_COLORS,
        y_label="Cumulative Episode Reward",
        base_path="./normalizer"
    )
    plot_result(
        f"{comp} Lambda - Different Normalizers",
        lambdas[comp],
        dylam_lambdas[comp],
        FORMATTER,
        NORMALIZER_METHOD_COLORS,
        y_label="Lambda Value",
        base_path="./normalizer"
    )

## Epsilon ablation

In [18]:
EPSILON_METHOD_COLORS = {
    "0.8": "#EABE39",
    "0.9": "#EA39BE",
    "0.95": "#3965EA",
    "DyLam": "#39EA65",
}

rewards = {comp: {epsilon: pd.read_csv(f"epsilon/rew-{comp}-{int(float(epsilon)*100)}.csv") for epsilon in ["0.8", "0.9", "0.95"]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_rewards = {comp: pd.read_csv(f"ChickenBanana/rew-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

lambdas = {comp: {epsilon: pd.read_csv(f"epsilon/lambda-{comp}-{int(float(epsilon)*100)}.csv") for epsilon in ["0.8", "0.9", "0.95"]} for comp in ["Banana", "Chicken", "Gate"]}
dylam_lambdas = {comp: pd.read_csv(f"ChickenBanana/lambda-{comp}.csv") for comp in ["Banana", "Chicken", "Gate"]}

for comp in ["Banana", "Chicken", "Gate"]:
    plot_result(
        f"{comp} Component - Different Epsilon decay rates",
        rewards[comp],
        dylam_rewards[comp],
        FORMATTER,
        EPSILON_METHOD_COLORS,
        y_label="Cumulative Episode Reward",
        base_path="./epsilon"
    )
    plot_result(
        f"{comp} Lambda - Different Epsilon decay rates",
        lambdas[comp],
        dylam_lambdas[comp],
        FORMATTER,
        EPSILON_METHOD_COLORS,
        y_label="Lambda Value",
        base_path="./epsilon"
    )