In [1]:
import wandb
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tinydb import TinyDB, Query
plt.style.use('seaborn-paper')

api = wandb.Api(timeout=30)
entity = "joasiee"
optims = ["GOMEA", "AdaptiveStochasticGradientDescent"]

if not os.path.exists("tinydb"):
    os.makedirs("tinydb")

def get_runs(project, filters={}):
    return api.runs(entity + "/" + project, filters=filters)

def get_finished_run_data(runs):
    names, configs, histories = [], [], []
    for run in runs:
        if run.state == "finished":
            names.append(run.name)
            configs.append(
                {k: v for k, v in run.config.items()
                 if not k.startswith('_')})
            history = run.scan_history()
            histories.append(pd.DataFrame.from_dict(history))
    return names, configs, histories

def run_to_dict(name, config, history):
    res = {"name": name, "config": config, "resolutions": []}
    
    nr_resolutions = int(config["NumberOfResolutions"])
    for r in range(0, nr_resolutions):
        condition = ~np.isnan(history[f"R{r}/metric"]) if nr_resolutions > 1 else history.index
        indices = history.index[condition]
        columns = ["_step", "_runtime", "_timestamp"] +  [c for c in history.columns if f"R{r}/" in c]
        history_r = history[columns]
        res["resolutions"].append(history_r.loc[indices].to_dict())
    
    return res

def save_experiment_data_locally(project):
    runs = get_runs(project)
    names, configs, histories = get_finished_run_data(runs)
    db = TinyDB(f"tinydb/{project}.db")
    db.drop_tables()
    for name, config, history in zip(names, configs, histories):
        db.insert(run_to_dict(name, config, history))

def load_db(project):
    db = TinyDB(f"tinydb/{project}.db")
    return db

In [None]:
import xarray as xr

runs = get_runs("convergence_experiment")
run = runs[-1]
df = pd.DataFrame.from_dict(run.scan_history())
xarr = xr.DataArray(df)
xarr.attrs = run.config
xarr

In [None]:
xarr.loc[:, "R0/metric"].plot()

In [None]:
db = load_db("convergence_experiment")

def get_metric_mean_std(runs):
    metric_values = np.array(
        [
            np.array(list(run["resolutions"][0]["R0/metric"].values())).astype(
                np.float64
            )
            for run in runs
        ]
    )
    return np.median(metric_values, 0), np.std(metric_values, 0)


def get_mean_runtimes(runs):
    runtimes = np.array(
        [
            np.nancumsum(
                np.array(list(run["resolutions"][0]["R0/time[ms]"].values())).astype(
                    np.float64
                )
            )
            for run in runs
        ]
    )
    return np.median(runtimes, 0)


def plot_runs(runs, label, instance):
    x = get_mean_runtimes(runs)
    y, error = get_metric_mean_std(runs)
    plt.title(f"Instance: {instance}", fontsize=14)
    plt.plot(x, y, linewidth=2, label=label)
    plt.fill_between(x, y - 2 * error, y + 2 * error, alpha=0.3)
    # plt.yscale("log")
    plt.xlabel("Runtime [ms]", fontsize=12)
    plt.ylabel("MSE", fontsize=12)
    plt.legend(prop={"size": 16})
    plt.text(x[-1], y[-1], f' - {y[-1]}', fontsize=12)


def plot_instances(instances=[16, 17, 14]):
    for instance in instances:
        asgd_runs = db.search(
            (Query().config.Optimizer == "AdaptiveStochasticGradientDescent")
            & (Query().config.Instance == instance)
        )
        gomea_full_runs = db.search(
            (Query().config.Optimizer == "GOMEA")
            & (~Query().config.PartialEvaluations.exists())
            & (Query().config.Instance == instance)
        )
        gomea_partial_runs = db.search(
            (Query().config.Optimizer == "GOMEA")
            & (Query().config.PartialEvaluations.exists())
            & (Query().config.Instance == instance)
        )

        plt.figure(figsize=(12, 8))
        plot_runs(asgd_runs, "ASGD", instance)
        plot_runs(gomea_full_runs, "GOMEA-Full", instance)

        plt.figure(figsize=(12, 8))
        plot_runs(gomea_partial_runs, "GOMEA-Partial", instance)


In [None]:
plot_instances()

In [5]:
names, configs, histories = get_finished_run_data(get_runs("zandbak"))

In [6]:
histories[0]

Unnamed: 0,_step,R0.missedpixels,R0.||gradient||,_runtime,R0.time[ms],R0.3a:time,R0.3b:stepsize,R0.metric,_timestamp
0,0,0.979167,0.665054,12,9.8,0.00,5.480747,5.937500,1649948191
1,1,0.000000,0.338915,12,0.4,0.00,5.480747,5.708196,1649948191
2,2,0.000000,1.189214,12,0.5,1.00,5.231622,5.109480,1649948191
3,3,1.625000,0.391677,12,1.9,0.99,5.234001,2.859657,1649948191
4,4,0.000000,0.317176,12,0.7,1.99,5.006337,2.490119,1649948191
...,...,...,...,...,...,...,...,...,...
1997,1997,44.250000,0.004086,13,0.4,0.00,5.480747,0.310725,1649948192
1998,1998,44.250000,0.004085,13,0.3,0.00,5.480747,0.310634,1649948192
1999,1999,44.375000,0.004093,13,0.4,0.00,5.480747,0.311240,1649948192
2000,2000,44.375000,0.004093,13,3.3,0.00,5.480747,22.360576,1649948192


In [7]:
names[0]

'1649948178_examples_1_adaptivestochasticgradientdescent_0a39baea'