In [1]:
import pandas as pd
from statistics import median
from typing import Union, List
from pandas import DataFrame
import matplotlib.pyplot as plt
import jax.numpy as jnp
from qdax.utils.plotting import plot_2d_map_elites_repertoire_for_pgfplots
from scipy.stats import ranksums



In [6]:
def line_plot(df: DataFrame, x: str, y: Union[List[str], str], groups: Union[List[str], str] = None,
              file_prefix: str = "", cols_joiner: str = "_", filename_joiner: str = "_"):
    if isinstance(y, str):
        y = [y]
    if isinstance(groups, str):
        groups = [groups]

    def q1(a):
        return a.quantile(0.25)

    def q3(b):
        return b.quantile(0.75)

    vals = dict([(key, [q1, q3, median]) for key in y])

    summary = df.groupby(groups + [x]).agg(vals)
    summary.columns = [cols_joiner.join(col) for col in summary.columns.to_flat_index()]
    summary.reset_index(inplace=True)

    key_df = df.drop_duplicates(subset=groups)

    for i in range(len(key_df)):
        tmp = summary
        current_filename = file_prefix
        for key in groups:
            tmp = tmp[tmp[key] == key_df[key].iloc[i]]
            current_filename += f"{filename_joiner if len(current_filename) > 0 and not current_filename.endswith('/') else ''}{key_df[key].iloc[i]}"
        tmp.to_csv(f"{current_filename}.txt", sep="\t", index=False)


def box_plot(df: DataFrame, x: str, y: str, groups: Union[List[str], str] = None, file_prefix: str = "",
             filename_joiner: str = "_"):
    if isinstance(groups, str):
        groups = [groups]
    if groups is None or len(groups) == 0:
        _box_plot(df, x, y, file_prefix)

    else:
        key_df = df.drop_duplicates(subset=groups)

        for i in range(len(key_df)):
            tmp = df
            current_filename = file_prefix
            for key in groups:
                tmp = tmp[tmp[key] == key_df[key].iloc[i]]
                current_filename += f"{filename_joiner if len(current_filename) > 0 else ''}{key_df[key].iloc[i]}"
            _box_plot(tmp, x, y, current_filename)


def _box_plot(df: DataFrame, x: str, y: str, file_name: str):
    plt.figure(visible=False)
    data = []
    for xi in df[x].unique():
        data.append([k for k in df[df[x] == xi][y] if str(k) != "nan"])

    bp = plt.boxplot(data, showmeans=False)

    minimums = [round(item.get_ydata()[0], 1) for item in bp['caps']][::2]
    q1 = [round(min(item.get_ydata()), 1) for item in bp['boxes']]
    medians = [item.get_ydata()[0] for item in bp['medians']]
    q3 = [round(max(item.get_ydata()), 1) for item in bp['boxes']]
    maximums = [round(item.get_ydata()[0], 1) for item in bp['caps']][1::2]

    rows = [df[x].unique().tolist(), minimums, q1, medians, q3, maximums]

    with open(f"{file_name}.txt", "w") as bp_file:
        for row in rows:
            bp_file.write("\t".join(map(str, row)) + "\n")

In [7]:
dfs = []
algo_mapping = {
    "all": "3b", "s1": "brain", "s2": "body", "s3": "behavior", "ga": "ga"
}
for seed in range(10):
    for controller in ["nn", "graph"]:
        for sampler in algo_mapping.keys():
            if controller == "graph" and sampler != "ga":
                tmp_df = pd.read_csv(f"../results/me/evo-body-10x10-floor-{sampler}_{seed}.csv")
            elif controller == "nn" and sampler != "ga":
                tmp_df = pd.read_csv(f"../results/me_nn/evo-body-10x10-walker-{sampler}_{seed}.csv")
            elif controller == "graph" and sampler == "ga":
                tmp_df = pd.read_csv(f"../results/ga/evo-body-10x10_{seed}.csv")
            elif controller == "nn" and sampler == "ga":
                tmp_df = pd.read_csv(f"../results/ga/evo-body-nn-10x10-walker_{seed}.csv")
            else:
                raise Exception("wrong combination")
            tmp_df["algorithm"] = algo_mapping[sampler]
            tmp_df["seed"] = seed
            tmp_df["controller"] = controller
            dfs.append(tmp_df)
df = pd.concat(dfs)
df.head()

Unnamed: 0,iteration,max_fitness,qd_score1,qd_score2,qd_score3,coverage1,coverage2,coverage3,time,current_time,invalid_individuals,algorithm,seed,controller
0,1,0.562116,2.821863,1.579132,-0.020165,1.367188,6.347656,1.953125,11.43346,2024-08-12 16:42:57.189083,0.0,3b,0,nn
1,2,0.562116,4.191794,5.345062,-0.012267,1.757812,8.398438,2.34375,5.578582,2024-08-12 16:43:02.767961,0.0,3b,0,nn
2,3,0.562116,6.079382,8.130862,-0.647105,2.246094,9.863281,2.636719,5.605529,2024-08-12 16:43:08.373949,0.0,3b,0,nn
3,4,0.562116,7.028851,10.669348,-0.44491,2.34375,11.328125,3.027344,5.565599,2024-08-12 16:43:13.939902,0.0,3b,0,nn
4,5,0.562116,8.077906,14.320849,-0.712019,2.441406,13.085938,3.222656,5.470364,2024-08-12 16:43:19.410662,0.0,3b,0,nn


In [4]:
final_df = df[df["iteration"] == max(df["iteration"])]
final_df.head()

Unnamed: 0,iteration,max_fitness,qd_score1,qd_score2,qd_score3,coverage1,coverage2,coverage3,time,current_time,invalid_individuals,algorithm,seed,controller
3999,4000,5.89104,239.98662,1490.6387,825.5987,10.644531,55.95703,46.09375,5.75799,2024-08-12 22:56:35.212428,0.0,3b,0,nn
3999,4000,6.041441,384.66403,587.3635,249.98843,12.207031,36.914062,18.164062,6.108852,2024-08-13 20:24:44.973355,0.0,brain,0,nn
3999,4000,10.238633,156.07501,3202.6965,1072.2517,7.421875,56.640625,27.832031,6.204555,2024-08-14 03:14:52.260795,0.0,body,0,nn
3999,4000,0.879594,2.543657,161.49133,-14.039587,1.074219,40.527344,12.011719,5.628537,2024-08-14 14:50:32.184378,0.0,behavior,0,nn
3999,4000,10.696037,,,,,,,4.224857,2024-08-09 21:38:00.419680,,ga,0,nn


In [8]:
pairs = []
for sam in final_df.algorithm.unique():
    tmp_df = final_df[final_df["algorithm"] == sam]
    fits1 = tmp_df[tmp_df["controller"] == "nn"]["max_fitness"].to_list()
    fits2 = tmp_df[tmp_df["controller"] != "nn"]["max_fitness"].to_list()
    _, p_value = ranksums(fits1, fits2)
    print(f"{sam} -> {p_value}")

fits1 = final_df[(final_df["controller"] == "nn") & (final_df["algorithm"] == "ga")]["max_fitness"].to_list()
fits2 = final_df[(final_df["controller"] != "nn") & (final_df["algorithm"] == "3b")]["max_fitness"].to_list()
_, p_value = ranksums(fits1, fits2)
print(f"NN+GA vs graph+3B-QD -> {p_value}")

3b -> 0.7623688184698398
brain -> 0.40567889528505297
body -> 0.09630369202868826
behavior -> 0.0006697294490218271
ga -> 0.04125001659393949
NN+GA vs graph+3B-QD -> 0.3643461266335529


In [9]:
# evolution of fitness lineplots
line_plot(
    df=df,
    x="iteration",
    y="max_fitness",
    groups=["controller", "algorithm"],
    file_prefix="../pgfplots/evolution_fitness"
)

In [None]:
# end of evolution performance
box_plot(
    df=final_df,
    x="algorithm",
    y="max_fitness",
    groups="controller",
    file_prefix="../pgfplots/final_fitness"
)

In [6]:
# evolution of coverage lineplots 
line_plot(
    df=df[df["algorithm"] != "ga"],
    x="iteration",
    y=["coverage1", "coverage2", "coverage3"],
    groups=["controller", "algorithm"],
    file_prefix="../pgfplots/evolution_coverage"
)

In [9]:
# nn repertoire
rep_seed = 1
reps = {0: "brain", 1: "body", 2: "behavior"}
min_fit, max_fit = jnp.inf, -jnp.inf
base_path = f"../results/me_nn/evo-body-10x10-walker-all_{rep_seed}"
for r in reps.keys():
    fitnesses = jnp.load(f"{base_path}/r{r + 1}_fitnesses.npy")
    curr_min_fit, curr_max_fit = min(fitnesses[fitnesses > -jnp.inf]), max(fitnesses)
    if curr_min_fit < min_fit: min_fit = curr_min_fit
    if curr_max_fit > max_fit: max_fit = curr_max_fit

for r in reps.keys():
    centroids = jnp.load(f"{base_path}/r{r + 1}_centroids.npy")
    fitnesses = jnp.load(f"{base_path}/r{r + 1}_fitnesses.npy")
    v_min, v_max = plot_2d_map_elites_repertoire_for_pgfplots(
        centroids=centroids,
        repertoire_fitnesses=fitnesses,
        minval=jnp.asarray([0, 0]),
        maxval=jnp.asarray([1, 1]),
        vmin=min_fit,
        vmax=max_fit,
        target_file=f"../pgfplots/repertoire_nn_{reps[r]}.pdf"
    )
print(min_fit, max_fit)

<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

-3.1087337 10.587589


In [10]:
# graph repertoire
rep_seed = 0
min_fit, max_fit = jnp.inf, -jnp.inf
base_path = f"../results/me/evo-body-10x10-floor-all_{rep_seed}"
for r in reps.keys():
    fitnesses = jnp.load(f"{base_path}/r{r + 1}_fitnesses.npy")
    curr_min_fit, curr_max_fit = min(fitnesses[fitnesses > -jnp.inf]), max(fitnesses)
    if curr_min_fit < min_fit: min_fit = curr_min_fit
    if curr_max_fit > max_fit: max_fit = curr_max_fit

for r in reps.keys():
    centroids = jnp.load(f"{base_path}/r{r + 1}_centroids.npy")
    fitnesses = jnp.load(f"{base_path}/r{r + 1}_fitnesses.npy")
    v_min, v_max = plot_2d_map_elites_repertoire_for_pgfplots(
        centroids=centroids,
        repertoire_fitnesses=fitnesses,
        minval=jnp.asarray([0, 0]),
        maxval=jnp.asarray([1, 1]),
        vmin=min_fit,
        vmax=max_fit,
        target_file=f"../pgfplots/repertoire_graph_{reps[r]}.pdf"
    )
print(min_fit, max_fit)

<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

-3.0884957 10.415704


In [24]:
samplers = ["all", "s1", "s2", "s3"]
seed = 0
repertoires = range(1, 4)
coverage_dfs = []
for sampler in samplers:
    for repertoire in repertoires:
        tmp_df = pd.read_csv(f"../results/coverage/cgp_evo-body-10x10-floor-{sampler}_{seed}_g{repertoire}.csv")
        tmp_df["sampler"] = sampler
        tmp_df["seed"] = seed
        tmp_df["origin_repertoire"] = repertoire
        coverage_dfs.append(tmp_df)
coverage_df = pd.concat(coverage_dfs)
coverage_df = pd.melt(coverage_df, id_vars=["max_fitness", "sampler", "seed", "origin_repertoire"],
                      value_vars=["coverage1", "coverage2", "coverage3"])
coverage_df = coverage_df.rename(columns={"variable": "destination_repertoire", "value": "coverage"})
coverage_df["destination_repertoire"] = coverage_df["destination_repertoire"].apply(
    lambda x: int(x.replace("coverage", "")))

# make it relative and save to file
for sampler_id, sampler in enumerate(samplers):
    tmp_coverage_df = coverage_df[(coverage_df["sampler"] == sampler) & (coverage_df["seed"] == seed)]
    c_1 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 1) & (tmp_coverage_df["destination_repertoire"] == 1)][
            "coverage"].iloc[0]
    c_2 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 2) & (tmp_coverage_df["destination_repertoire"] == 2)][
            "coverage"].iloc[0]
    c_3 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 3) & (tmp_coverage_df["destination_repertoire"] == 3)][
            "coverage"].iloc[0]
    clear_coverage_df = tmp_coverage_df.copy()
    clear_coverage_df["relative_coverage"] = clear_coverage_df.apply(
        lambda row: (row["coverage"] / c_1) if row["destination_repertoire"] == 1 else
        ((row["coverage"] / c_2) if row["destination_repertoire"] == 2 else (row["coverage"] / c_3)),
        axis=1)
    clear_coverage_df[["origin_repertoire", "destination_repertoire", "coverage", "relative_coverage"]].sort_values(
        by=["origin_repertoire", "destination_repertoire"]).to_csv(
        f"../pgfplots/trans_coverage_graph_{algo_mapping[sampler]}.txt", sep="\t", index=False)

In [25]:
samplers = ["all", "s1", "s2", "s3"]
seed = 0
repertoires = range(1, 4)
coverage_dfs = []
for sampler in samplers:
    for repertoire in repertoires:
        tmp_df = pd.read_csv(f"../results/coverage/nn_evo-body-10x10-walker-{sampler}_{seed}_g{repertoire}.csv")
        tmp_df["sampler"] = sampler
        tmp_df["seed"] = seed
        tmp_df["origin_repertoire"] = repertoire
        coverage_dfs.append(tmp_df)
coverage_df = pd.concat(coverage_dfs)
coverage_df = pd.melt(coverage_df, id_vars=["max_fitness", "sampler", "seed", "origin_repertoire"],
                      value_vars=["coverage1", "coverage2", "coverage3"])
coverage_df = coverage_df.rename(columns={"variable": "destination_repertoire", "value": "coverage"})
coverage_df["destination_repertoire"] = coverage_df["destination_repertoire"].apply(
    lambda x: int(x.replace("coverage", "")))

# make it relative and save to file
for sampler_id, sampler in enumerate(samplers):
    tmp_coverage_df = coverage_df[(coverage_df["sampler"] == sampler) & (coverage_df["seed"] == seed)]
    c_1 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 1) & (tmp_coverage_df["destination_repertoire"] == 1)][
            "coverage"].iloc[0]
    c_2 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 2) & (tmp_coverage_df["destination_repertoire"] == 2)][
            "coverage"].iloc[0]
    c_3 = \
        tmp_coverage_df[
            (tmp_coverage_df["origin_repertoire"] == 3) & (tmp_coverage_df["destination_repertoire"] == 3)][
            "coverage"].iloc[0]
    clear_coverage_df = tmp_coverage_df.copy()
    clear_coverage_df["relative_coverage"] = clear_coverage_df.apply(
        lambda row: (row["coverage"] / c_1) if row["destination_repertoire"] == 1 else
        ((row["coverage"] / c_2) if row["destination_repertoire"] == 2 else (row["coverage"] / c_3)),
        axis=1)
    clear_coverage_df[["origin_repertoire", "destination_repertoire", "coverage", "relative_coverage"]].sort_values(
        by=["origin_repertoire", "destination_repertoire"]).to_csv(
        f"../pgfplots/trans_coverage_nn_{algo_mapping[sampler]}.txt", sep="\t", index=False)

In [22]:
tasks = ["BridgeWalker-v0", "CustomCarrier-v0", "PlatformJumper-v0", "CaveCrawler-v0"]
evo_dfs = []
sampling = "all"
for task in tasks:
    for controller in ["nn", "graph"]:
        for seed in range(10):
            try:
                if controller == "nn":
                    tmp_df = pd.read_csv(
                        f"../results/me_nn/evo-body-10x10-{task.replace('-v0', '').lower()}-{sampling}_{seed}.csv")
                else:
                    tmp_df = pd.read_csv(
                        f"../results/me/evo-body-10x10-{task.replace('-v0', '').lower()}-{sampling}_{seed}.csv")
                tmp_df["seed"] = seed
                tmp_df["task"] = task.replace('-v0', '').lower()
                tmp_df["algorithm"] = "3b"  # sampling
                tmp_df["from"] = "direct"
                tmp_df["controller"] = controller
                evo_dfs.append(tmp_df)
            except FileNotFoundError as e:
                print(e)

            if controller == "nn":
                tmp_df = pd.read_csv(f"../results/ga/evo-body-nn-10x10-{task.replace('-v0', '').lower()}_{seed}.csv")
            else:
                tmp_df = pd.read_csv(f"../results/ga/evo-body-10x10-{task.replace('-v0', '').lower()}_{seed}.csv")
            tmp_df["seed"] = seed
            tmp_df["task"] = task.replace('-v0', '').lower()
            tmp_df["algorithm"] = "ga"  # sampling
            tmp_df["from"] = "direct"
            tmp_df["controller"] = controller
            evo_dfs.append(tmp_df)
evo_df = pd.concat(evo_dfs, ignore_index=True)
final_evo_df = evo_df[evo_df["iteration"] == max(evo_df["iteration"])][
    ["max_fitness", "coverage1", "coverage2", "coverage3", "task", "seed", "algorithm", "from", "controller"]]
dfs = [final_evo_df]
samplings = ["all", "s1", "s2", "s3"]
for task in tasks:
    for seed in range(10):
        for controller in ["nn", "graph"]:
            if controller == "nn":
                tmp_df = pd.read_csv(f"../results/transfer_nn/ga_evo-body-nn-10x10-walker_{seed}_{task}.csv")
            else:
                tmp_df = pd.read_csv(f"../results/transfer/evo-body-10x10_{seed}_{task}.csv")
            tmp_df["seed"] = seed
            tmp_df["task"] = task.replace('-v0', '').lower()
            tmp_df["algorithm"] = "ga"
            tmp_df["from"] = "population"
            tmp_df["max_fitness"] = tmp_df["max_fitness"].apply(lambda x: float(x.replace("[", "").replace("]", "")))
            tmp_df["controller"] = controller
            dfs.append(tmp_df)
            for sampling in samplings:
                for rep_id in range(3):
                    rep = f"g{rep_id + 1}"
                    if controller == "nn":
                        tmp_df = pd.read_csv(
                            f"../results/transfer_nn/me_evo-body-10x10-walker-{sampling}_{seed}_{rep}_{task}.csv")
                    else:
                        tmp_df = pd.read_csv(
                            f"../results/transfer/evo-body-10x10-floor-{sampling}_{seed}_{rep}_{task}.csv")
                    tmp_df["seed"] = seed
                    tmp_df["task"] = task.replace('-v0', '').lower()
                    tmp_df["algorithm"] = algo_mapping[sampling]
                    tmp_df["from"] = reps[rep_id]
                    tmp_df["controller"] = controller
                    dfs.append(tmp_df)
transfer_df = pd.concat(dfs)
# transfer_df.dropna(inplace=True)
transfer_df.head()

[Errno 2] No such file or directory: '../results/me/evo-body-10x10-cavecrawler-all_9.csv'


Unnamed: 0,max_fitness,coverage1,coverage2,coverage3,task,seed,algorithm,from,controller
3999,3.712342,11.425781,55.17578,27.539062,bridgewalker,0,3b,direct,nn
7999,6.30594,,,,bridgewalker,0,ga,direct,nn
11999,6.329406,11.328125,58.59375,35.839844,bridgewalker,1,3b,direct,nn
15999,6.18504,,,,bridgewalker,1,ga,direct,nn
19999,6.582234,11.523438,57.03125,22.851562,bridgewalker,2,3b,direct,nn


In [23]:
# end of evolution performance
box_plot(
    df=transfer_df,
    x="from",
    y="max_fitness",
    groups=["algorithm", "task", "controller"],
    file_prefix="../pgfplots/transfer"
)

<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})


<IPython.core.display.Javascript object>

  self.comm = Comm('matplotlib', data={'id': self.uuid})
