In [None]:
%pylab inline
import numpy as np
import scipy.stats as spstats
import pandas as pd

In [None]:
sample_dirs = [
    "archive/paper-2.0/nk_delete 1/2017-09-05 20:43:37 fc24cdf",
    "archive/paper-2.0/nk_delete 2/2017-09-05 20:43:40 fc24cdf",
    "archive/paper-2.0/nk_delete 3/2017-09-05 20:43:43 fc24cdf",
    "archive/paper-2.0/nk_delete 4/2017-09-05 20:43:37 fc24cdf",
    "archive/paper-2.0/nk_delete 5/2017-09-05 20:43:37 fc24cdf",
    "archive/paper-2.0/nk_delete 6/2017-09-05 20:43:43 fc24cdf"
]
nosample_dirs = [
    "archive/paper-2.0/nk_delete_nosample 1/2017-09-05 20:50:39 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 2/2017-09-05 20:50:39 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 3/2017-09-05 20:50:40 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 4/2017-09-05 20:50:39 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 5/2017-09-05 20:50:40 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 6/2017-09-05 20:50:39 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 7/2017-09-05 20:50:41 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 8/2017-09-05 20:50:42 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 9/2017-09-05 20:50:44 fc24cdf",
    "archive/paper-2.0/nk_delete_nosample 10/2017-09-05 20:50:45 fc24cdf"
]
all_dirs = sample_dirs + nosample_dirs

In [None]:
df_sample_values = pd.concat([pd.DataFrame.from_csv("%s/values.csv" % d) for d in sample_dirs])
df_nosample_values = pd.concat([pd.DataFrame.from_csv("%s/values.csv" % d) for d in nosample_dirs])
df_sample_values["sample"] = 3
df_nosample_values["sample"] = 0

In [None]:
df_runs = pd.concat([pd.DataFrame.from_csv("%s/runs.csv" % d) for d in all_dirs])
df_values = pd.concat([df_sample_values, df_nosample_values])

In [None]:
min_keep = min(set(df_runs["keep"]))
df_runs = df_runs[(df_runs["keep"] == 1) | (df_runs["keep"] == min_keep)]
df_values = df_values[(df_values["keep"] == 1) | (df_values["keep"] == min_keep)]

In [None]:
df_values["run_id"] = df_values["timestamp"].map(str) + "-" + df_values["jobid"].map(str) + "-" + df_values["trial"].map(str)

In [None]:
def values_to_efficiency(values):
    max_delta = [max(values) - values[i] for i in range(len(values))]
    nonpositive = [i for i, d in enumerate(max_delta) if d <= 0.0001]
    return 1.0 / float(nonpositive[0])

def values_to_converge(values):
    max_delta = [max(values) - values[i] for i in range(len(values))]
    nonpositive = [i for i, d in enumerate(max_delta) if d <= 0.0001]
    return nonpositive[0]    

In [None]:
d_trial = []
d_strat = []
d_eff = []
d_perf = []
d_rewire = []
d_degree = []
d_converge = []
d_keep = []
d_sample = []
for i, trial in enumerate(sorted(set(df_values["run_id"]))):
    for j, strategy in enumerate(sorted(set(df_values["strategy"]))):
        df_strat = df_values[df_values["strategy"] == strategy]
        df = df_strat[df_strat["run_id"] == trial]
        eff = values_to_efficiency(list(df["value"]))
        d_converge.append(values_to_converge(list(df["value"])))
        d_trial.append(trial)
        d_strat.append(strategy)
        d_eff.append(eff)
        d_rewire.append(df_runs["rewire"][trial])
        d_degree.append(df_runs["degree"][trial])
        d_keep.append(df_runs["keep"][trial])
        d_perf.append(df_runs["%s_perf" % strategy][trial])
        d_sample.append(df_runs["sample"][trial])
df_eff = pd.DataFrame({
    "trial": d_trial,
    "rewire": d_rewire,
    "degree": d_degree,
    "strategy": d_strat,
    "eff": d_eff,
    "perf": d_perf,
    "converge": d_converge,
    "keep": d_keep,
    "sample": d_sample
})

In [None]:
plt.figure(figsize=(3*15.0/6.0,2*15.0/6.0))
for i, strategy in enumerate(sorted(set(df_eff["strategy"]))):
    plt.subplot(2,3, i+1)
    plt.title(strategy)
    df = df_eff[df_eff["strategy"] == strategy]
    plot(df["converge"], df["perf"], '.')
    xlabel('Converge Time')
    ylabel('Value')
    xlim([0, 300])
    ylim([0.7, 0.8])
plt.tight_layout()
plt.savefig("fig-converge-value.png", format="png", dpi=300)

In [None]:
strategies = ["loc_best", "loc_conform", "best", "conform", "loc_cons"]
fmt = [".", "s", "d", "*"]
plt.figure(figsize=(3*15.0/6.0,len(strategies)*15.0/6.0))
for strategy_i, strategy in enumerate(strategies):
    df_strategy = df_eff[df_eff["strategy"] == strategy]
    plt.subplot(len(strategies),1,1+strategy_i)
    plt.title(strategy)
    x = ["no-sample sparse", "no-sample full", "sample sparse", "sample full"]
    for sample_i, sample in enumerate([0, 3]):
        df_sample = df_strategy[df_strategy["sample"] == sample]
        for keep_i, keep in enumerate([min(sorted(set(df_eff['keep']))), 1.0]):
            df_keep = df_sample[df_sample["keep"] == keep]
            perf_m = df_keep["perf"].mean()
            perf_s = df_keep["perf"].std(ddof=1)
            perf_se = perf_s / math.sqrt(len(df_keep))
            i = sample_i*2 + keep_i
            plt.errorbar([i], [perf_m], yerr=perf_se, label=x[i], capsize=4, fmt=fmt[i], markersize=6)
    plt.legend()
    plt.xticks([0,1,2,3], ["", "", "", ""])
    plt.ylabel("Performance")
    plt.xlim([-1,5])
    plt.grid()
plt.tight_layout()

In [None]:
min(sorted(set(df_runs["sample"])))

In [None]:
df = df_eff[df_eff["strategy"] == "best"]
df = df[df["sample"] == 3]
y = []
x = []
se = []
for keep in sorted(set(df["keep"])):
    d = df[df["keep"] == keep]
    x.append(keep)
    y.append(d["perf"].mean())
    se.append(d["perf"].std(ddof=1) / sqrt(len(d)))
plt.errorbar(x, y, yerr=se)