In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from typing import List, Union

from pymoo.indicators.hv import HV

sns.set_theme(style="white", rc={"grid.color": "lightgray"})

In [2]:
def get_non_pareto_dominated_inds(candidates: Union[np.ndarray, List], remove_duplicates: bool = True) -> np.ndarray:
    """A batched and fast version of the Pareto coverage set algorithm.

    Args:
        candidates (ndarray): A numpy array of vectors.
        remove_duplicates (bool, optional): Whether to remove duplicate vectors. Defaults to True.

    Returns:
        ndarray: The indices of the elements that should be kept to form the Pareto front or coverage set.
    """
    candidates = np.array(candidates)
    uniques, indcs, invs, counts = np.unique(candidates, return_index=True, return_inverse=True, return_counts=True, axis=0)

    res_eq = np.all(candidates[:, None, None] <= candidates, axis=-1).squeeze()
    res_g = np.all(candidates[:, None, None] < candidates, axis=-1).squeeze()
    c1 = np.sum(res_eq, axis=-1) == counts[invs]
    c2 = np.any(~res_g, axis=-1)
    if remove_duplicates:
        to_keep = np.zeros(len(candidates), dtype=bool)
        to_keep[indcs] = 1
    else:
        to_keep = np.ones(len(candidates), dtype=bool)

    return np.logical_and(c1, c2) & to_keep


def filter_pareto_dominated(candidates: Union[np.ndarray, List], remove_duplicates: bool = True) -> np.ndarray:
    """A batched and fast version of the Pareto coverage set algorithm.

    Args:
        candidates (ndarray): A numpy array of vectors.
        remove_duplicates (bool, optional): Whether to remove duplicate vectors. Defaults to True.

    Returns:
        ndarray: A Pareto coverage set.
    """
    candidates = np.array(candidates)
    if len(candidates) < 2:
        return candidates
    return candidates[get_non_pareto_dominated_inds(candidates, remove_duplicates=remove_duplicates)]

def get_hv(points, ref_point):
    return HV(ref_point=ref_point * -1)(np.array(points) * -1)

def report(hvs: dict[str, tuple[float, int, str]], log10: bool = False):
    for algo, (hv, cardinality, time) in hvs.items():
        hv_str = "HV"
        if log10:
            hv_str+= " (log10)"
            hv = np.log10(hv)
        hv_str+= ": {:.3f}".format(hv)
        print(f"{algo} {hv_str} - {cardinality} points - {time} minutes")

## Minecart

In [3]:
envelope_path = "minecart/envelope-pareto.csv"
gpils_path = "minecart/gpils-pareto.csv"
pcn_path = "minecart/pcn-pareto.csv"
ipro_dqn_path = "minecart/ipro-dqn-pareto.csv"
ipro_ppo_path = "minecart/ipro-ppo-pareto.csv"


In [4]:
envelope = pd.read_csv(envelope_path)
gpils = pd.read_csv(gpils_path)
pcn = pd.read_csv(pcn_path)
ipro_dqn = pd.read_csv(ipro_dqn_path)
ipro_ppo = pd.read_csv(ipro_ppo_path)

In [5]:
baseline_paths = [
    "minecart/baseline_objective_1.csv",
    "minecart/baseline_objective_2.csv",
    "minecart/baseline_objective_3.csv",
]
baseline_dfs = [pd.read_csv(path) for path in baseline_paths]
baseline = np.zeros((len(baseline_dfs[0]), len(baseline_dfs)))
for i, df in enumerate(baseline_dfs):
    baseline[:, i] = df.values[:, 1]

udc_paths = [
    "minecart/udc_objective_1.csv",
    "minecart/udc_objective_2.csv",
    "minecart/udc_objective_3.csv",
]
udc_dfs = [pd.read_csv(path) for path in udc_paths]
udc = np.zeros((len(udc_dfs[0]), len(udc_dfs)))
for i, df in enumerate(udc_dfs):
    udc[:, i] = df.values[:, 1]

dylam_paths = [
    "minecart/dylam_objective_1.csv",
    "minecart/dylam_objective_2.csv",
    "minecart/dylam_objective_3.csv",
]
dylam_dfs = [pd.read_csv(path) for path in dylam_paths]
dylam = np.zeros((len(dylam_dfs[0]), len(dylam_dfs)))
for i, df in enumerate(dylam_dfs):
    dylam[:, i] = df.values[:, 1]



In [6]:
baseline_pareto = filter_pareto_dominated(baseline)
udc_pareto = filter_pareto_dominated(udc)
dylam_pareto = filter_pareto_dominated(dylam)

In [7]:
ref_point = np.array([-1, -1, -200])
hvs = {
    algo: (get_hv(values, ref_point), len(values), time)
    for algo, values, time in [
        ("Envelope", envelope.values, "507"),
        ("GPI-LS", gpils.values, "422"),
        ("PCN", pcn.values, "120"),
        # ("IPRO-DQN", ipro_dqn.values, "240"),
        ("IPRO", ipro_ppo.values, "2400"),
        ("DQN", baseline_pareto, "15"),
        ("UDC", udc_pareto, "21"),
        ("DyLam-DQN", dylam_pareto, "20"),
    ]
}

In [8]:
minecart_df = pd.DataFrame.from_dict(hvs, orient="index", columns=["HV", "Cardinality", "Time (m)"])
minecart_df.to_csv("minecart/hypervolume_report.csv")
report(hvs)

Envelope HV: 707.652 - 155 points - 507 minutes
GPI-LS HV: 723.864 - 500 points - 422 minutes
PCN HV: 686.565 - 373 points - 120 minutes
IPRO HV: 721.563 - 86 points - 2400 minutes
DQN HV: 1209.280 - 31 points - 15 minutes
UDC HV: 1199.029 - 52 points - 21 minutes
DyLam-DQN HV: 1213.670 - 32 points - 20 minutes


## Reacher

In [9]:
envelope_path = "Reacher/envelope.csv"
gpils_path = "Reacher/gpils.csv"
pcn_path = "Reacher/pcn.csv"
ipro_path = "Reacher/ipro.csv"
envelope = pd.read_csv(envelope_path)
gpils = pd.read_csv(gpils_path)
pcn = pd.read_csv(pcn_path)
ipro = pd.read_csv(ipro_path)

In [10]:
baseline_paths = [
    "Reacher/baseline_objective_1.csv",
    "Reacher/baseline_objective_2.csv",
    "Reacher/baseline_objective_3.csv",
    "Reacher/baseline_objective_4.csv",
]
baseline_dfs = [pd.read_csv(path) for path in baseline_paths]
baseline = np.zeros((len(baseline_dfs[0]), len(baseline_dfs)))
for i, df in enumerate(baseline_dfs):
    baseline[:, i] = df.values[:, 1]

udc_paths = [
    "Reacher/udc_objective_1.csv",
    "Reacher/udc_objective_2.csv",
    "Reacher/udc_objective_3.csv",
    "Reacher/udc_objective_4.csv",
]
udc_dfs = [pd.read_csv(path) for path in udc_paths]
udc = np.zeros((len(udc_dfs[0]), len(udc_dfs)))
for i, df in enumerate(udc_dfs):
    udc[:, i] = df.values[:, 1]

dylam_paths = [
    "Reacher/dylam_objective_1.csv",
    "Reacher/dylam_objective_2.csv",
    "Reacher/dylam_objective_3.csv",
    "Reacher/dylam_objective_4.csv",
]
dylam_dfs = [pd.read_csv(path) for path in dylam_paths]
dylam = np.zeros((len(dylam_dfs[0]), len(dylam_dfs)))
for i, df in enumerate(dylam_dfs):
    dylam[:, i] = df.values[:, 1]

In [11]:
baseline_pareto = filter_pareto_dominated(baseline)
dylam_pareto = filter_pareto_dominated(dylam)
udc_pareto = filter_pareto_dominated(udc)


In [12]:
ref_point = np.array([-50, -50, -50, -50])
hvs = {
    algo: (get_hv(values, ref_point), len(values), time)
    for algo, values, time in [
        ("Envelope", envelope.values, "51"),
        ("GPI-LS", gpils.values, "1200"),
        ("PCN", pcn.values, "62"),
        ("IPRO", ipro.values, "3300"),
        ("DQN", baseline_pareto, "83"),
        ("UDC", udc_pareto, "70"),
        ("DyLam-DQN", dylam_pareto, "42"),
    ]
}

In [13]:
reacher_df = pd.DataFrame.from_dict(hvs, orient="index", columns=["HV (log10)", "Cardinality", "Time (m)"])
reacher_df["HV (log10)"] = np.log10(reacher_df["HV (log10)"])
reacher_df.to_csv("Reacher/hypervolume_report.csv")
report(hvs, log10=True)

Envelope HV (log10): 7.479 - 100 points - 51 minutes
GPI-LS HV (log10): 7.559 - 100 points - 1200 minutes
PCN HV (log10): 7.377 - 100 points - 62 minutes
IPRO HV (log10): 7.527 - 526 points - 3300 minutes
DQN HV (log10): 10.709 - 69 points - 83 minutes
UDC HV (log10): 11.165 - 54 points - 70 minutes
DyLam-DQN HV (log10): 11.236 - 73 points - 42 minutes


# Lunar Lander

In [14]:
envelope_path = "LunarLander/envelope-pareto.csv"
gpils_path = "LunarLander/gpils-pareto.csv"
pcn_path = "LunarLander/pcn-pareto.csv"
ipro_path = "LunarLander/ipro-pareto.csv"
envelope = pd.read_csv(envelope_path)
gpils = pd.read_csv(gpils_path)
pcn = pd.read_csv(pcn_path)
ipro = pd.read_csv(ipro_path)

In [17]:
dqn_paths = [
    "LunarLander/DQN-rew-Shaping.csv",
    "LunarLander/DQN-rew-Power_Linear.csv",
    "LunarLander/DQN-rew-Power_Angular.csv",
    "LunarLander/DQN-rew-Landing_Rate.csv",
]
dqn_dfs = [pd.read_csv(path) for path in dqn_paths]
dqn = np.zeros((len(dqn_dfs[0]), len(dqn_dfs)))
for i, df in enumerate(dqn_dfs):
    dqn[:, i] = df.values[:, 1]

udc_paths = [
    "LunarLander/UDC-rew-Shaping.csv",
    "LunarLander/UDC-rew-Power_Linear.csv",
    "LunarLander/UDC-rew-Power_Angular.csv",
    "LunarLander/UDC-rew-Landing_Rate.csv",
]
udc_dfs = [pd.read_csv(path) for path in udc_paths]
udc = np.zeros((len(udc_dfs[0]), len(udc_dfs)))
for i, df in enumerate(udc_dfs):
    udc[:, i] = df.values[:, 1]

dylam_paths = [
    "LunarLander/rew-Shaping.csv",
    "LunarLander/rew-Power_Linear.csv",
    "LunarLander/rew-Power_Angular.csv",
    "LunarLander/rew-Landing_Rate.csv",
]
dylam_dfs = [pd.read_csv(path) for path in dylam_paths]
dylam = np.zeros((len(dylam_dfs[0]), len(dylam_dfs)))
for i, df in enumerate(dylam_dfs):
    dylam[:, i] = df.values[:, 1]


In [18]:
dqn_pareto = filter_pareto_dominated(dqn)
udc_pareto = filter_pareto_dominated(udc)
dylam_pareto = filter_pareto_dominated(dylam)

In [19]:
ref_point = np.array([-1001, -1001, -1001, -2])
hvs = {
    algo: (get_hv(values, ref_point), len(values), time)
    for algo, values, time in [
        ("Envelope", envelope.values, "46"),
        ("GPI-LS", gpils.values, "557"),
        ("PCN", pcn.values, "29"),
        ("IPRO", ipro.values, "69"),
        ("DQN", dqn_pareto, "7"),
        ("UDC", udc_pareto, "18"),
        ("DyLam-DQN", dylam_pareto, "6"),
    ]
}

In [20]:
lunar_lander_df = pd.DataFrame.from_dict(hvs, orient="index", columns=["HV (log10)", "Cardinality", "Time (m)"])
lunar_lander_df["HV (log10)"] = np.log10(lunar_lander_df["HV (log10)"])
lunar_lander_df.to_csv("LunarLander/hypervolume_report.csv")
report(hvs, log10=True)

Envelope HV (log10): 9.292 - 37 points - 46 minutes
GPI-LS HV (log10): 9.425 - 43 points - 557 minutes
PCN HV (log10): 9.284 - 13 points - 29 minutes
IPRO HV (log10): 9.175 - 9 points - 69 minutes
DQN HV (log10): 9.708 - 83 points - 7 minutes
UDC HV (log10): 9.713 - 98 points - 18 minutes
DyLam-DQN HV (log10): 9.706 - 86 points - 6 minutes
