In [None]:
import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt

In [None]:
def read_benchmark(path):
    dfs = []

    with open(path, 'r') as file:
        for doc in yaml.safe_load_all(file):
            instance = "-".join(doc["instance"].split("/")[-1].split("-")[:-1])
            lower_bound = doc["lower_bound_name"]
            #iterations = doc["iterations"]
            #seed = experiment["seed"]
            dfs.append(pd.DataFrame([[instance, lower_bound, np.mean(doc["values"]), np.mean(doc["complete_times"]), np.std(doc["complete_times"])]], columns=["instance", "lower_bound", "value", "time_mean", "time_std"]))

    return pd.concat(dfs, ignore_index=True)

In [None]:
paths = ["../output/lb_greedy.yaml", "../output/lb_local_search.yaml", "../output/lb_linearprogram.yaml"]
df = pd.concat(map(read_benchmark, paths)) 

In [None]:
df["c"] = df["lower_bound"].map({"Greedy": 0, "LocalSearch": 1, "LinearProgram": 2})
df["time_mean"] = df["time_mean"] / 1e9
df["time_std"] = df["time_std"] / 1e9

In [None]:
fig, ax = plt.subplots()
ax.set_yscale("log")
ax.scatter(df["value"], df["time_mean"], c=df["c"])
ax.set_xlabel("lower bound value")
ax.set_ylabel("time (s)")
plt.show()

In [None]:
df.drop_duplicates(["instance", "lower_bound"], inplace=True)
a = df[df["c"] == 0]
b = df[df["c"] == 1].set_index("instance")
c = df[df["c"] == 2].set_index("instance")
#df = a.join(b, on="instance", lsuffix="_greedy", rsuffix="_localsearch").join(c, on="instance", rsuffix="_linearprogram")

In [None]:
fig, ax = plt.subplots()
ax.scatter(df["value_greedy"], df["value"])
ax.plot(df["value_greedy"], df["value_greedy"])
plt.show()

fig, ax = plt.subplots()
ax.scatter(df["time_mean_greedy"], df["time_mean"])
ax.plot(df["time_mean_greedy"], df["time_mean_greedy"])
plt.show()

In [None]:
plt.plot(np.sort(df["time_mean_greedy"]))
plt.plot(np.sort(df["time_mean_localsearch"]))
plt.show()

plt.scatter(df["time_mean_greedy"], df["time_mean_localsearch"])
plt.show()

In [None]:
with open("../output/bio_solutions.yaml", "r") as file:
    solution_docs = list(yaml.safe_load_all(file))

In [None]:
sol_df = pd.DataFrame(solution_docs)
sol_df["instance"] = sol_df["instance"].str.replace(".*/", "")
sol_df["solution"] = sol_df["solutions"].apply(lambda x: x[0] if len(x) > 0 else None)
sol_df["time"] = sol_df["time"].apply(lambda x: x[0])
def f(sol):
    if sol is None:
        return -1
    else:
        return len(sol["edits"])
sol_df["solution_size"] = sol_df["solution"].apply(f)

In [None]:
df = df.join(sol_df.set_index("instance"), on="instance")

In [None]:
df["quality"] = df["value"] / df["solution_cost"]
df.loc[df["solution_cost"] == -1, "quality"] = 0
df.loc[df["solution_cost"] == df["value"], "quality"] = 1

In [None]:
plt.scatter(df["solution_cost"], df["time"])
plt.show()
plt.scatter(df["solution_size"], df["time"])
plt.show()

In [None]:
for (lb, group_df) in df.groupby("lower_bound"):
    plt.scatter(group_df["solution_size"], group_df["quality"], label=lb)
plt.legend()
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(12, 4), sharey=True)
for ax, (lb, group_df) in zip(axes, df.groupby("lower_bound")):
    m = group_df["quality"] > 0
    ax.scatter(group_df.loc[m, "quality"], group_df.loc[m, "time_mean"], label=lb)
    ax.set_ylabel("time (s)")
    ax.set_xlabel("quality")
    ax.legend()
plt.show()

In [None]:
(df["solution_cost"] == 0).sum(), (df["solution_cost"] >= 0).sum()

In [None]:
df["instance"].unique().size