In [2]:
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from thesispy.experiments.dataset import FinishedRun, Dataset

plt.style.use(["science", "high-vis", "../resources/plt_custom.txt"])
api = wandb.Api(timeout=30)
entity = "joasiee"
DEFAULT_WIDTH = 485


def parse_run(run):
    return FinishedRun(
        run.name, run.config, pd.DataFrame.from_dict(run.scan_history())
    )

def get_runs_as_dataset(project, filters={}):
    runs = []
    for run in api.runs(entity + "/" + project, filters=filters):
        runs.append(parse_run(run))
    return Dataset(project, runs)

In [None]:
sampling_ds = Dataset.load("sampling_experiment2")
asgd_sampling = sampling_ds.filter("Optimizer == 'AdaptiveStochasticGradientDescent'")
gomeafull_sampling = sampling_ds.filter("Optimizer == 'GOMEA' AND NOT PartialEvaluations")
gomeapartial_sampling = sampling_ds.filter("Optimizer == 'GOMEA' AND PartialEvaluations")

In [None]:
groups = []
final_evals = []

for group, runs in gomeapartial_sampling.groupby(["SamplingPercentage"]):
    groups.append(group[0])
    evals = []
    for run in runs:
        evals.append(run.resolutions_val[0]["R0/metric"])
    if len(runs) == 4:
        evals.append(np.mean(evals))
    final_evals.append(evals)

groups = [np.around(group,2) for group in groups]
groups, final_evals = zip(*sorted(zip(groups, final_evals)))
gomeapartial_df = pd.DataFrame(np.array(final_evals).transpose(), columns=groups)
gomeapartial_df["Optimizer"] = "GOMEA-partial"

In [None]:
frames = [asgd_df, gomeafull_df, gomeapartial_df]
sampling_df = pd.concat(frames)
boxplot_df = pd.melt(sampling_df, id_vars=["Optimizer"], var_name="SamplingPercentage", value_name="MSE")

In [None]:
sns.boxplot(x="SamplingPercentage", y="MSE", hue="Optimizer", data=boxplot_df, linewidth=0.7, fliersize=0.25)
plt.savefig("sampling_boxplot_all.pdf")

In [None]:
def sampling_boxplots(title, filters):
    asgd_sampling = get_runs_as_dataset("sampling_experiment2", filters)

    groups = []
    final_evals = []

    for group, runs in asgd_sampling.groupby(["SamplingPercentage"]):
        groups.append(group[0])
        evals = []
        for run in runs:
            evals.append(run.resolutions_val[0]["R0/metric"])
        final_evals.append(evals)

    groups, final_evals = zip(*sorted(zip(groups, final_evals)))
    g = sns.boxplot(data=final_evals, palette="ch:s=.25,rot=-.25")

    g.set_xticklabels([str(np.around(x, 2)) for x in groups])
    plt.xlabel("Sampling Percentage")
    plt.ylabel("Mean Squared Error")
    plt.title(title)

In [None]:
sampling_boxplots(
    "Adaptive Stochastic Gradient Descent",
    {"config.Optimizer": "AdaptiveStochasticGradientDescent"},
)


In [None]:
sampling_boxplots(
    "GOMEA-full",
    {"config.Optimizer": "GOMEA", "config.PartialEvaluations": {"$exists": False}},
)


In [None]:
sampling_boxplots(
    "GOMEA-partial",
    {"config.Optimizer": "GOMEA", "config.PartialEvaluations": True},
)