In [1]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns

from yaml import safe_load

In [2]:
sns.set_theme(style="white", rc={"grid.color": "lightgray"})
plt.rcParams["figure.dpi"] = 600
formatter = ticker.ScalarFormatter(useMathText=True)
formatter.set_scientific(True)
formatter.set_powerlimits((-1, 1))

In [3]:
method_colors = {
    "Q-Learning": "orange",
    "DQN": "orange",
    "SAC": "orange",
    "Baseline": "orange",
    "DyLam": "blue",
    "drQ": "green",
    "GPILS": "green",
}

In [4]:
params = safe_load(open("../experiments.yml", "r"))
reward_ranges = {
    value["gym_id"].replace("mo-", ""): {
        "r_max": value["r_max"],
        "r_min": value["r_min"],
    }
    for value in params["Dylam"].values()
}

## Analysis per Objective

### Smoothing a given signal using a moving average filter (100 points)

In [5]:
def smooth_curve(points: np.array, factor: float = 0.9):
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points

### Plot function

In [6]:
def plot_result(
    gym_id,
    results: dict,
    formatter: ticker.ScalarFormatter,
    colors: dict,
    y_label: str,
    x_label: str = "Number of training steps",
    smooth_factor_mean: float = 0.9,
    smooth_factor_min_max: float = 0.9,
):
    fig, ax = plt.subplots()
    for method in results.keys():
        mean_key = [
            key
            for key in results[method].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]
        min_key = [key for key in results[method].keys() if key.endswith("MIN")][0]
        max_key = [key for key in results[method].keys() if key.endswith("MAX")][0]

        x = results[method]["Step"]
        y = smooth_curve(results[method][mean_key], factor=smooth_factor_mean)
        y_min = smooth_curve(results[method][min_key], factor=smooth_factor_min_max)
        y_max = smooth_curve(results[method][max_key], factor=smooth_factor_min_max)

        # Plot the data using y as the mean continuous line and y_min/y_max as shaded regions
        ax.plot(x, y, label=method, color=colors[method])
        ax.fill_between(x, y_min, y_max, color=colors[method], alpha=0.2)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label, labelpad=1)
    ax.grid(True)
    ax.legend()
    ax.set_title(f"{gym_id}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.savefig(f"{gym_id}.pdf", format="pdf")
    plt.close()

In [7]:
def plot_lambdas(
    title, lambdas, formatter, smooth_factor, x_label="Number of training steps"
):
    COLOR = {
        "0": "red",
        "1": "blue",
        "2": "green",
        "3": "orange",
    }
    fig, ax = plt.subplots()
    for weight in lambdas.keys():
        mean_key = [
            key
            for key in lambdas[weight].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]

        x = lambdas[weight]["Step"]
        y = smooth_curve(lambdas[weight][mean_key], factor=smooth_factor)
        ax.plot(x, y, label=weight.replace("_", " "), color=COLOR[mean_key[-1]])
    ax.set_xlabel(x_label)
    ax.set_ylabel(r"$\lambda$ weights")
    ax.set_ylim(0, 1)
    ax.grid(True)
    ax.legend()
    ax.set_title(f"{title}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    path = title + "-weights.pdf"
    plt.savefig(path, format="pdf")
    plt.close()

In [8]:
def plot_rewards(
    title,
    rewards,
    reward_max,
    reward_min,
    formatter,
    smooth_factor,
    x_label="Number of training steps",
):
    def normalize_rewards(rew, reward_max, reward_min):
        rew = np.array(rew)
        abs_max = max(abs(reward_max), abs(reward_min))
        rew = rew / abs_max
        return rew

    PARAM = {
        0: ("red", (0, (3, 1))),
        1: ("blue", (0, (3, 4))),
        2: ("green", (0, (3, 8))),
        3: ("orange", (0, (3, 12))),
    }
    fig, ax = plt.subplots()
    normalized_max_rewards = [
        normalize_rewards([reward_max[i]], reward_max[i], reward_min[i])
        for i in range(len(reward_max))
    ]
    for i, reward in enumerate(rewards.keys()):
        mean_key = [
            key
            for key in rewards[reward].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]

        x = rewards[reward]["Step"]
        r_max_line = [normalized_max_rewards[i]] * len(x)
        ax.plot(x, r_max_line, color=PARAM[i][0], linestyle=PARAM[i][1])
        y = smooth_curve(rewards[reward][mean_key], factor=smooth_factor)
        y = normalize_rewards(y, reward_max[i], reward_min[i])
        ax.plot(x, y, label=reward.replace("_", " "), color=PARAM[i][0])
    ax.set_xlabel(x_label)
    ax.set_ylabel("Cumulative Episode Rewards (Normalized)", labelpad=1)
    ax.set_ylim(-1.2, 1.2)
    ax.grid(True)
    ax.legend()
    ax.set_title(title)
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    path = title + "-components.pdf"
    plt.savefig(path, format="pdf")
    plt.close()

### Taxi-v3

In [9]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv") for x in ["Q-Learning", "DyLam"]
}
plot_result(
    "Taxi-v3",
    results,
    formatter,
    method_colors,
    "Passenger dropoff rate",
    x_label="Number of training episodes",
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["Fuel", "Passanger_dropoff", "Illegal_action"]
}
plot_lambdas("Taxi-v3", lambdas, formatter, 0.99, x_label="Number of training episodes")

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["rew-Fuel", "rew-Passanger_dropoff", "rew-Illegal_action"]
}
reward_max = reward_ranges["Taxi-v3"]["r_max"]
reward_min = reward_ranges["Taxi-v3"]["r_min"]
plot_rewards("Taxi-v3", rewards, reward_max, reward_min, formatter, 0.99, x_label="Number of training episodes")

### LunarLander-v2

In [24]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv") for x in ["DQN", "DyLam"]
}
plot_result(
    "LunarLander-v2",
    results,
    formatter,
    method_colors,
    "Landing rate",
    smooth_factor_mean=0.99,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in ["Shaping", "Power_Linear", "Power_Angular", "Landing_Rate"]
}
plot_lambdas("LunarLander-v2", lambdas, formatter, 0.99)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in [
        "rew-Shaping",
        "rew-Power_Linear",
        "rew-Power_Angular",
        "rew-Landing_Rate",
    ]
}
reward_max = reward_ranges["LunarLander-v2"]["r_max"]
reward_min = reward_ranges["LunarLander-v2"]["r_min"]
plot_rewards("LunarLander-v2", rewards, reward_max, reward_min, formatter, 0.99)

### HalfCheetah-v4

In [25]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv") for x in ["SAC", "DyLam"]
}
plot_result(
    "HalfCheetah-v4",
    results,
    formatter,
    method_colors,
    "Final Position",
    smooth_factor_mean=0.99,
    smooth_factor_min_max=0.99,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["Run", "Control"]
}
plot_lambdas("HalfCheetah-v4", lambdas, formatter, 0.99)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["rew-Run", "rew-Control"]
}
reward_max = reward_ranges["HalfCheetah-v4"]["r_max"]
reward_min = reward_ranges["HalfCheetah-v4"]["r_min"]
plot_rewards("HalfCheetah-v4", rewards, reward_max, reward_min, formatter, 0.99)

### VSS-v0

In [26]:
results = {x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv") for x in ["SAC", "DyLam"]}
plot_result(
    "VSS-v0",
    results,
    formatter,
    method_colors,
    "Goal rate",
    smooth_factor_mean=0.99,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["Move_to_ball", "Ball_to_goal", "Energy"]
}
plot_lambdas("VSS-v0", lambdas, formatter, 0.99)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["rew-Move_to_ball", "rew-Ball_to_goal", "rew-Energy"]
}
reward_max = reward_ranges["VSS-v0"]["r_max"]
reward_min = reward_ranges["VSS-v0"]["r_min"]
plot_rewards("VSS-v0", rewards, reward_max, reward_min, formatter, 0.99)