In [27]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns

from yaml import safe_load

In [28]:
sns.set_theme(style="white", rc={"grid.color": "lightgray"})
plt.rcParams["figure.dpi"] = 600
formatter = ticker.ScalarFormatter(useMathText=True)
formatter.set_scientific(True)
formatter.set_powerlimits((-1, 1))

In [29]:
method_colors = {
    "Q-Learning": "orange",
    "DQN": "orange",
    "SAC": "orange",
    "Baseline": "orange",
    "DyLam": "blue",
    "drQ": "green",
    "GPILS": "green",
}

In [30]:
params = safe_load(open("../experiments.yml", "r"))
reward_ranges = {
    value["gym_id"].replace("mo-", ""): {
        "r_max": value["r_max"],
        "r_min": value["r_min"],
    }
    for value in params["Dylam"].values()
}

## Analysis per Objective

### Smoothing a given signal using a moving average filter (100 points)

In [31]:
def smooth_curve(points: np.array, factor: int = 100):
    cumsum = np.cumsum(np.insert(points, 0, 0))
    return (cumsum[factor:] - cumsum[:-factor]) / float(factor)

### Plot function

In [32]:
def plot_result(
    gym_id,
    results: dict,
    formatter: ticker.ScalarFormatter,
    colors: dict,
    y_label: str,
    x_label: str = "Number of training steps",
    smooth_factor: int = 100,
    smooth_factor_min_max: int = 100,
):
    fig, ax = plt.subplots()
    for method in results.keys():
        mean_key = [
            key
            for key in results[method].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]
        min_key = [key for key in results[method].keys() if key.endswith("MIN")][0]
        max_key = [key for key in results[method].keys() if key.endswith("MAX")][0]

        x = results[method]["Step"].loc[smooth_factor-1:]
        y = smooth_curve(results[method][mean_key], factor=smooth_factor)
        ax.plot(x, y, label=method, color=colors[method])

        x = results[method]["Step"].loc[smooth_factor_min_max-1:]
        y_min = smooth_curve(results[method][min_key], smooth_factor_min_max)
        y_max = smooth_curve(results[method][max_key], smooth_factor_min_max)
        ax.fill_between(x, y_min, y_max, color=colors[method], alpha=0.2)

    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label, labelpad=1)
    ax.grid(True)
    ax.legend()
    ax.set_title(f"{gym_id}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.savefig(f"{gym_id}.pdf", format="pdf")
    plt.close()

In [33]:
def plot_result_taxi(
    gym_id,
    results: dict,
    formatter: ticker.ScalarFormatter,
    colors: dict,
    y_label: str,
    x_label: str = "Number of training steps",
    smooth_factor: int = 100,
):
    def get_reward(result, keys):
        reward = (
            result[keys[0]]
            - abs(result[keys[1]] / 200)
            - abs((result[keys[2]]) / 200) + 0.1
        )
        reward = np.clip(reward, -1, 1)
        return reward

    fig, ax = plt.subplots()
    for method in results.keys():
        mean_keys = [
            key
            for key in results[method].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ]
        min_keys = [key for key in results[method].keys() if key.endswith("MIN")]
        max_keys = [key for key in results[method].keys() if key.endswith("MAX")]

        result_mean = get_reward(results[method], mean_keys)
        result_min = get_reward(results[method], min_keys)
        result_max = get_reward(results[method], max_keys)

        x = results[method]["Step"].loc[smooth_factor - 1 :]
        y = smooth_curve(result_mean, factor=smooth_factor)
        y_min = smooth_curve(result_min, factor=smooth_factor)
        y_max = smooth_curve(result_max, factor=smooth_factor)

        # Plot the data using y as the mean continuous line and y_min/y_max as shaded regions
        ax.plot(x, y, label=method, color=colors[method])
        ax.fill_between(x, y_min, y_max, color=colors[method], alpha=0.2)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label, labelpad=1)
    ax.grid(True)
    ax.legend()
    ax.set_title(f"{gym_id}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.savefig(f"{gym_id}.pdf", format="pdf")
    plt.close()

In [34]:
def plot_lambdas(
    title, lambdas, formatter, smooth_factor, x_label="Number of training steps"
):
    COLOR = {
        0: "red",
        1: "blue",
        2: "green",
        3: "orange",
    }
    fig, ax = plt.subplots()
    for i, weight in enumerate(lambdas.keys()):
        mean_key = [
            key
            for key in lambdas[weight].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]

        x = lambdas[weight]["Step"].loc[smooth_factor - 1 :]
        y = smooth_curve(lambdas[weight][mean_key], factor=smooth_factor)
        ax.plot(x, y, label=weight.replace("_", " "), color=COLOR[i])
    ax.set_xlabel(x_label)
    ax.set_ylabel(r"$\lambda$ weights")
    ax.set_ylim(0, 1)
    ax.grid(True)
    ax.legend()
    ax.set_title(f"{title}")
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    path = title + "-weights.pdf"
    plt.savefig(path, format="pdf")
    plt.close()

In [35]:
def plot_rewards(
    title,
    rewards,
    reward_max,
    reward_min,
    formatter,
    smooth_factor,
    x_label="Number of training steps",
):
    def normalize_rewards(rew, reward_max, reward_min):
        rew = np.array(rew)
        abs_max = max(abs(reward_max), abs(reward_min))
        rew = rew / abs_max
        return rew

    PARAM = {
        0: ("red", (0, (3, 1))),
        1: ("blue", (0, (3, 4))),
        2: ("green", (0, (3, 8))),
        3: ("orange", (0, (3, 12))),
    }
    fig, ax = plt.subplots()
    normalized_max_rewards = [
        normalize_rewards([reward_max[i]], reward_max[i], reward_min[i])
        for i in range(len(reward_max))
    ]
    for i, reward in enumerate(rewards.keys()):
        mean_key = [
            key
            for key in rewards[reward].keys()
            if not (
                key.startswith("Step") or key.endswith("MAX") or key.endswith("MIN")
            )
        ][0]

        x = rewards[reward]["Step"].loc[smooth_factor-1:]
        r_max_line = [normalized_max_rewards[i]] * len(x)
        ax.plot(x, r_max_line, color=PARAM[i][0], linestyle=PARAM[i][1])
        y = smooth_curve(rewards[reward][mean_key], factor=smooth_factor)
        y = normalize_rewards(y, reward_max[i], reward_min[i])
        ax.plot(x, y, label=reward.replace("_", " "), color=PARAM[i][0])
    ax.set_xlabel(x_label)
    ax.set_ylabel("Cumulative Episode Rewards (Normalized)", labelpad=1)
    ax.set_ylim(-1.2, 1.2)
    ax.grid(True)
    ax.legend()
    ax.set_title(title)
    plt.gca().yaxis.set_major_formatter(formatter)
    plt.gca().xaxis.set_major_formatter(formatter)
    path = title + "-components.pdf"
    plt.savefig(path, format="pdf")
    plt.close()

### Taxi-v3

In [36]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["Q-Learning", "drQ", "DyLam"]
}
plot_result_taxi(
    "Taxi-v3",
    results,
    formatter,
    method_colors,
    "Efficient Drop-off Rate",
    x_label="Number of training episodes",
    smooth_factor=10,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["Fuel", "Passenger_Drop-off", "Illegal_action"]
}
plot_lambdas("Taxi-v3", lambdas, formatter, 1, x_label="Number of training episodes")

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["rew-Fuel", "rew-Passenger_Drop-off", "rew-Illegal_action"]
}
reward_max = reward_ranges["Taxi-v3"]["r_max"]
reward_min = reward_ranges["Taxi-v3"]["r_min"]
plot_rewards(
    "Taxi-v3",
    rewards,
    reward_max,
    reward_min,
    formatter,
    10,
    x_label="Number of training episodes",
)

### LunarLander-v2

In [37]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in ["DQN", "drQ", "DyLam"]
}
plot_result(
    "LunarLander-v2",
    results,
    formatter,
    method_colors,
    "Landing rate",
    smooth_factor=100,
    smooth_factor_min_max=20,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in ["Shaping", "Power_Linear", "Power_Angular", "Landing_Rate"]
}
plot_lambdas("LunarLander-v2", lambdas, formatter, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in [
        "rew-Shaping",
        "rew-Power_Linear",
        "rew-Power_Angular",
        "rew-Landing_Rate",
    ]
}
reward_max = reward_ranges["LunarLander-v2"]["r_max"]
reward_min = reward_ranges["LunarLander-v2"]["r_min"]
plot_rewards("LunarLander-v2", rewards, reward_max, reward_min, formatter, 100)

### HalfCheetah-v4

In [41]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["SAC", "drQ", "DyLam"]
}
plot_result(
    "HalfCheetah-v4",
    results,
    formatter,
    method_colors,
    "Final Position",
    smooth_factor=100,
    smooth_factor_min_max=100,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["Run", "Control"]
}
plot_lambdas("HalfCheetah-v4", lambdas, formatter, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["rew-Run", "rew-Control"]
}
reward_max = reward_ranges["HalfCheetah-v4"]["r_max"]
reward_min = reward_ranges["HalfCheetah-v4"]["r_min"]
plot_rewards("HalfCheetah-v4", rewards, reward_max, reward_min, formatter, 100)

### VSS-v0

In [40]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv") for x in ["SAC", "drQ", "DyLam"]
}
plot_result(
    "VSS-v0",
    results,
    formatter,
    method_colors,
    "Goal rate",
    smooth_factor=500,
    smooth_factor_min_max=100,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["Move_to_ball", "Ball_to_goal", "Energy"]
}
plot_lambdas("VSS-v0", lambdas, formatter, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["rew-Move_to_ball", "rew-Ball_to_goal", "rew-Energy"]
}
reward_max = reward_ranges["VSS-v0"]["r_max"]
reward_min = reward_ranges["VSS-v0"]["r_min"]
plot_rewards("VSS-v0", rewards, reward_max, reward_min, formatter, 100)