In [1]:
from matplotlib import font_manager
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from utils import (
    plot_lambdas,
    plot_rewards,
    plot_result,
    plot_result_taxi,
    smooth_curve,
    FORMATTER,
    METHOD_COLORS,
    REWARD_RANGES
)

sns.set_theme(style="white", rc={"grid.color": "lightgray"})
plt.rc("axes", titlesize=16, titleweight="bold", labelsize=14, labelweight="bold")
plt.rc("xtick", labelsize=12)
plt.rc("ytick", labelsize=12)

## Analysis per Objective

### Taxi-v3

In [2]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["Q-Learning", "Q-Decomposition", "DRQ", "Tuned-DRQ", "DyLam"]
}
plot_result(
    "Taxi-v3",
    results,
    FORMATTER,
    METHOD_COLORS,
    "Efficient Drop-off Rate",
    x_label="Number of training episodes",
    smooth_factor=10,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["Fuel", "Passenger_Drop_off", "Illegal_action"]
}
plot_lambdas("Taxi-v3", lambdas, FORMATTER, 1, x_label="Number of training episodes")

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"Taxi/{x}.csv")
    for x in ["rew-Fuel", "rew-Passenger_Drop_off", "rew-Illegal_action"]
}
reward_max = REWARD_RANGES["Taxi-v3"]["r_max"]
reward_min = REWARD_RANGES["Taxi-v3"]["r_min"]
plot_rewards(
    "Taxi-v3",
    rewards,
    reward_max,
    reward_min,
    FORMATTER,
    10,
    x_label="Number of training episodes",
)

### LunarLander-v2

In [3]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in ["DQN", "DRQ", "DyLam"]
}
plot_result(
    "LunarLander-v2",
    results,
    FORMATTER,
    METHOD_COLORS,
    "Landing rate",
    smooth_factor=100,
    smooth_factor_min_max=20,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in ["Shaping", "Power_Linear", "Power_Angular", "Landing_Rate"]
}
plot_lambdas("LunarLander-v2", lambdas, FORMATTER, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"LunarLander/{x}.csv")
    for x in [
        "rew-Shaping",
        "rew-Power_Linear",
        "rew-Power_Angular",
        "rew-Landing_Rate",
    ]
}
reward_max = REWARD_RANGES["LunarLander-v2"]["r_max"]
reward_min = REWARD_RANGES["LunarLander-v2"]["r_min"]
plot_rewards("LunarLander-v2", rewards, reward_max, reward_min, FORMATTER, 100)

### HalfCheetah-v4

In [4]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["SAC", "DRQ", "DyLam"]
}
plot_result(
    "HalfCheetah-v4",
    results,
    FORMATTER,
    METHOD_COLORS,
    "Final Position",
    smooth_factor=100,
    smooth_factor_min_max=100,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["Run", "Control"]
}
plot_lambdas("HalfCheetah-v4", lambdas, FORMATTER, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"HalfCheetah/{x}.csv")
    for x in ["rew-Run", "rew-Control"]
}
reward_max = REWARD_RANGES["HalfCheetah-v4"]["r_max"]
reward_min = REWARD_RANGES["HalfCheetah-v4"]["r_min"]
plot_rewards("HalfCheetah-v4", rewards, reward_max, reward_min, FORMATTER, 100)

### VSS-v0

In [5]:
results = {
    x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv") for x in ["SAC", "DRQ", "DyLam"]
}
plot_result(
    "VSS-v0",
    results,
    FORMATTER,
    METHOD_COLORS,
    "Goal rate",
    smooth_factor=500,
    smooth_factor_min_max=100,
)

lambdas = {
    x.replace(".csv", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["Move_to_ball", "Ball_to_goal", "Energy"]
}
plot_lambdas("VSS-v0", lambdas, FORMATTER, 1)

rewards = {
    x.replace("rew-", ""): pd.read_csv(f"VSS/{x}.csv")
    for x in ["rew-Move_to_ball", "rew-Ball_to_goal", "rew-Energy"]
}
reward_max = REWARD_RANGES["VSS-v0"]["r_max"]
reward_min = REWARD_RANGES["VSS-v0"]["r_min"]
plot_rewards("VSS-v0", rewards, reward_max, reward_min, FORMATTER, 100)