In [None]:
import numpy as np
import scipy.fftpack
import os
import json
import matplotlib.pyplot as pl
from sdirl.experiments import L2Error, OrderError, ProportionError
%matplotlib inline

In [None]:
class ExperimentLog():
    def __init__(self, file):
        with open(file) as f:
            self.log = json.load(f)
        self.comp_phase = 0
        self.err_phase = 1
        for i, phase in enumerate(self.log["phases"]):
            if phase["name"] == "Compute Bolfi Posterior":
                self.comp_phase = i
            elif phase["name"] == "Compute Bolfi Errors":
                self.err_phase = i
            elif "duration" in phase["results"].keys():
                self.comp_phase = i
            elif "errors" in phase["results"].keys():
                self.err_phase = i
            else:
                raise ValueError("Unknown phase {}".format(phase["name"]))

    @property
    def duration(self):
        return self.log["phases"][self.comp_phase]["results"]["duration"]

    def errors(self, name=None):
        if name is None:
            return self.log["phases"][self.err_phase]["results"]["errors"]
        for ename, evals in self.log["phases"][self.err_phase]["results"]["errors"].items():
            if name in ename:
                return evals
        assert False, (name, self.log["phases"][self.err_phase]["results"]["errors"].keys())
        
    @property
    def steps(self):
        for v in self.log["phases"][self.err_phase]["results"]["errors"].values():
            return len(v)

class TestExperimentLog(ExperimentLog):
    def __init__(self, duration, errors, steps):
        self.dur = duration
        self.errs = errors
        self.stps = steps
    
    @property
    def duration(self):
        return self.dur
    
    def errors(self, name=None):
        return self.errs[name]
    
    @property
    def steps(self):
        return self.stps
        
class ExperimentGroup():
    def __init__(self, name, experiments):
        self.name = name
        self.exp = experiments
        self.steps = self.exp[0].steps
        for e in self.exp:
            assert e.steps == self.steps, (e.steps, self.steps)
    
    def duration_mean(self):
        return np.mean([e.duration for e in self.exp])
    
    def duration_std(self):
        return np.std([e.duration for e in self.exp])
    
    def print_duration(self):
        print("* Mean duration {:.2f} seconds".format(self.duration_mean()))

    def _get_errors_from_step_matching(self, name, idx, filt=lambda x: True):
        return [e.errors(name)[idx]
                for e in self.exp
                if filt(e.errors(name)) is True]
    
    def error_mean_at_step_matching(self, name, idx, filt=lambda x: True):
        return float(np.mean(self._get_errors_from_step_matching(name, idx, filt)))

    def error_std_at_step_matching(self, name, idx, filt=lambda x: True):
        return float(np.std(self._get_errors_from_step_matching(name, idx, filt)))
        
    def error_mean_matching(self, name, filt=lambda x: True):
        return [self.error_mean_at_step_matching(name, idx)
                for idx in range(self.steps)]
        
    def error_std_matching(self, name,  filt=lambda x: True):
        return [self.error_std_at_step_matching(name, idx)
                for idx in range(self.steps)]
    
    def print_end_error_means(self):
        enames = sorted(self.exp[0].errors().keys())
        for ename in enames:
            end_error_mean = self.error_mean_at_step_matching(ename, -1)
            print("* {} mean {:.2f}".format(ename, end_error_mean))
        
    def print_summary(self):
        print("{} ({} experiments)".format(self.name, len(self.exp)))
        self.print_duration()
        self.print_end_error_means()
        
def plot_barchart(datas, groups=list(), title="", ylabel="", ylim=None, yscale=None, figsize=(5,3)):
    ind = np.arange(len(groups))
    bar_width = 1.0 / (len(datas)+1)

    fig, ax = pl.subplots(figsize=figsize)
    bars = list()
    for i, data in enumerate(datas):
        try:
            vals = [v[-1] for v in data["means"]]
            errs = [v[-1] for v in data["std"]]
        except:
            vals = data["means"]
            errs = data["std"]
        bar = ax.bar(ind + i*bar_width, vals, bar_width, color=data["color"], yerr=errs)
        bars.append(bar)

    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xticks(ind + len(datas)*bar_width/2.0)
    ax.set_xticklabels(groups)
    pl.xlim(ind[0]-0.1, ind[-1]+1.1-bar_width)
    if ylim is not None:
        pl.ylim(ylim)
    if yscale is not None:
        pl.yscale(yscale)

    ax.legend([b[0] for b in bars], [d["name"] for d in datas], loc=2, ncol=2)
    pl.show()
    return fig

def movingaverage(values, window_size=20):
    vals = [values.tolist()[0]] * window_size + values.tolist() + [values.tolist()[-1]] * window_size
    window = np.ones(int(window_size))/float(window_size)
    return np.convolve(vals, window, 'same')[window_size:len(values)+window_size]

def _plot_data_to_graph(vals, errs, ax, color, label, smooth, alpha=0.25):
    try:
        x = range(len(vals))
        if smooth is True:
            mav = movingaverage(vals)
            mae = movingaverage(errs)
            ax.plot(x, mav, color, label=label)
            ax.fill_between(x, mav-mae, mav+mae, facecolor=color, alpha=alpha)
        else:
            ax.plot(x, vals, color, label=label)
            ax.fill_between(x, vals+errs, vals-errs, facecolor=color, alpha=alpha)
    except:
        ax.axhline(vals, color=color, label=label)

def plot_graph(datas, groups=list(), title="", ylabel="", ylim=None, yscale=None,
               smooth=False, figsize=(5,5)):
    ind = np.arange(len(groups))
    bar_width = 1.0 / (len(datas)+1)

    fig, axarr = pl.subplots(len(groups), sharex=True, figsize=figsize)
    for i in range(len(groups)):
        for data in datas:
            vals = np.array(data["means"][i])
            errs = np.array(data["std"][i])
            _plot_data_to_graph(vals, errs, axarr[i], data["color"], data["name"], smooth)
        axarr[i].set_title(groups[i])
        axarr[i].set_ylabel(ylabel)
        if i+1 == len(groups):
            axarr[i].legend(bbox_to_anchor=(0., -0.3, 1., -1.102), loc=3,
                       ncol=len(datas), mode="expand", borderaxespad=0.)
    if smooth is True:
        pl.suptitle(title + " (smoothed)")
    else:
        pl.suptitle(title)
    if ylim is not None:
        pl.ylim(ylim)
    if yscale is not None:
        pl.yscale(yscale)

    pl.show()
    return fig

def extract(objlist, methodname, params=[]):
    return [getattr(obj, methodname)(*params) for obj in objlist]

def ret_object(typ, *items):
    ret = list()
    for name, color, data in items:
        d = {"name": name,
             "color": color,
             "means": None,
             "std": None,
             "means_top": None,
             "std_top": None,
             "means_bottom": None,
             "std_bottom": None}
        if typ == "duration":
            d["means"] = extract(data, "duration_mean")
            d["std"] = extract(data, "duration_std")
        else:
            d["means"] = extract(data, "error_mean_matching", [typ])
            d["std"] = extract(data, "error_std_matching", [typ])
        ret.append(d)
    return ret

def test_experiments():
    ex1 = TestExperimentLog(1, {"A": [1,1,1,2,2,2], "B": [2,2,2,4,4,4]}, 6)
    ex2 = TestExperimentLog(2, {"A": [1,1,1,2,2,2], "B": [3,3,3,3,3,3]}, 6)
    eg1 = ExperimentGroup("test1", [ex1, ex2])
    assert eg1.duration_mean() == 1.5
    assert eg1.duration_std() == 0.5
    assert eg1._get_errors_from_step_matching("A", 0) == [1,1]
    assert eg1._get_errors_from_step_matching("B", 3) == [4,3]
    assert eg1.error_mean_at_step_matching("A", 0) == 1
    assert eg1.error_mean_at_step_matching("B", 3) == 3.5
    assert eg1.error_std_at_step_matching("A", 0) == 0
    assert eg1.error_std_at_step_matching("B", 3) == 0.5
    assert eg1.error_mean_matching("A") == [1,1,1,2,2,2]
    assert eg1.error_mean_matching("B") == [2.5, 2.5, 2.5, 3.5, 3.5, 3.5]
    assert eg1.error_std_matching("A") == [0,0,0,0,0,0]
    assert eg1.error_std_matching("B") == [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    
    ex3 = TestExperimentLog(2, {"A": [2,2,2,2,2,2], "B": [0,0,0,0,0,0]}, 6)
    ex4 = TestExperimentLog(4, {"A": [1,1,1,1,1,1], "B": [0,0,0,1,1,1]}, 6)
    eg2 = ExperimentGroup("test2", [ex3, ex4])
    objs = [eg1, eg2]
    ret = ret_object("duration", ("Test", "g", objs))
    assert len(ret) == 1
    assert ret[0]["name"] == "Test"
    assert ret[0]["color"] == "g"
    assert ret[0]["means"] == [1.5, 3]
    assert ret[0]["std"] == [0.5, 1]
    ret = ret_object("A", ("Test", "g", objs))
    assert len(ret) == 1
    assert ret[0]["name"] == "Test"
    assert ret[0]["color"] == "g"
    assert ret[0]["means"] == [[1,1,1,2,2,2], [1.5, 1.5, 1.5, 1.5, 1.5, 1.5]]
    assert ret[0]["std"] == [[0,0,0,0,0,0], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]
    print("All ok")
    
test_experiments()

<h1>Duration comparison experiment</h1>

In [None]:
gt_3x3_exact = ExperimentGroup("3x3 grid exact inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_3_approx_False.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])
gt_3x3_approx = ExperimentGroup("3x3 grid approx inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_3_approx_True.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])
gt_5x5_exact = ExperimentGroup("5x5 grid exact inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_5_approx_False.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])
gt_5x5_approx = ExperimentGroup("5x5 grid approx inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_5_approx_True.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])
gt_7x7_exact = ExperimentGroup("7x7 grid exact inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_7_approx_False.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])
gt_7x7_approx = ExperimentGroup("7x7 grid approx inference", [
    ExperimentLog("../results/time/gt8_{}/experiment_grid_7_approx_True.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10]])

gt_3x3_exact.print_summary()
gt_3x3_approx.print_summary()
gt_5x5_exact.print_summary()
gt_5x5_approx.print_summary()
gt_7x7_exact.print_summary()
gt_7x7_approx.print_summary()

def get_duration_datas():
    exact = [gt_3x3_exact, gt_5x5_exact, gt_7x7_exact]
    approx = [gt_3x3_approx, gt_5x5_approx, gt_7x7_approx]
    datas = ret_object("duration", ("ELIAS", "orange", exact), ("ALIAS", "blue", approx))
    datas[0]["std"] = None
    datas[1]["std"] = None
    return datas

In [None]:
datas = get_duration_datas()
groups = ["3x3 grid", "5x5 grid", "7x7 grid"]
title = "Mean duration of first step of inference "
ylabel = "Duration (s) - log scale"
ylim = (1e0, 1.5e4)
fig = plot_barchart(datas, groups, title, ylabel, ylim, yscale="log")
fig.savefig('../results/one_step_duration.png', bbox_inches='tight')

<h1>Exact vs. approximate inference quality experiment</h1>

In [None]:
gt_5x5_2f_exact = ExperimentGroup("5x5 grid 2 features exact inference", [
    ExperimentLog("../results/both/gb4_5x5_2f_{}/experiment_approx_False.json".format(i))
        for i in [1,3,4,7]])
gt_5x5_2f_approx = ExperimentGroup("5x5 grid 2 features approx inference", [
    ExperimentLog("../results/both/gb4_5x5_2f_{}/experiment_approx_True.json".format(i))
        for i in [1,3,4,7]])
gt_5x5_3f_exact = ExperimentGroup("5x5 grid 3 features exact inference", [
    ExperimentLog("../results/both/gb4_5x5_3f_{}/experiment_approx_False.json".format(i))
        for i in [1,2,3,4,5,6,7]])
gt_5x5_3f_approx = ExperimentGroup("5x5 grid 3 features approx inference", [
    ExperimentLog("../results/both/gb4_5x5_3f_{}/experiment_approx_True.json".format(i))
        for i in [1,2,3,4,5,6,7]])

gt_5x5_2f_exact.print_summary()
gt_5x5_2f_approx.print_summary()
gt_5x5_3f_exact.print_summary()
gt_5x5_3f_approx.print_summary()

def get_eva_exp_datas(typ=""):
    exact = [gt_5x5_2f_exact, gt_5x5_3f_exact]
    approx = [gt_5x5_2f_approx, gt_5x5_3f_approx]
    ret = ret_object(typ, ("ELIAS", "g", exact), ("ALIAS", "r", approx))
    return ret

In [None]:
n_samples = 1000000

gt2 = [-0.33, -0.67]
l2e = L2Error(ground_truth=gt2)
oe = OrderError(ground_truth=gt2)
pe = ProportionError(ground_truth=gt2)
rnd_l2e_2f = [l2e._error([np.random.uniform(-1, 0) for i in range(2)]) for j in range(n_samples)]
rnd_oe_2f = [oe._error([np.random.uniform(-1, 0) for i in range(2)]) for j in range(n_samples)]
rnd_pe_2f = [pe._error([np.random.uniform(-1, 0) for i in range(2)]) for j in range(n_samples)]

gt3 = [-0.25, -0.5, -0.75]
l2e = L2Error(ground_truth=gt3)
oe = OrderError(ground_truth=gt3)
pe = ProportionError(ground_truth=gt3)
rnd_l2e_3f = [l2e._error([np.random.uniform(-1, 0) for i in range(3)]) for j in range(n_samples)]
rnd_oe_3f = [oe._error([np.random.uniform(-1, 0) for i in range(3)]) for j in range(n_samples)]
rnd_pe_3f = [pe._error([np.random.uniform(-1, 0) for i in range(3)]) for j in range(n_samples)]

random_L2_means = [np.mean(rnd_l2e_2f),
                   np.mean(rnd_l2e_3f),
                  ]
random_L2_stds = [np.std(rnd_l2e_2f),
                  np.std(rnd_l2e_3f),
                 ]
random_lp_means = [np.mean(rnd_pe_2f),
                   np.mean(rnd_pe_3f),
                  ]
random_lp_stds = [np.std(rnd_pe_2f),
                   np.std(rnd_pe_3f),
                 ]
random_ord_means = [np.mean(rnd_oe_2f),
                   np.mean(rnd_oe_3f),
                   ]
random_ord_stds = [np.std(rnd_oe_2f),
                   np.std(rnd_oe_3f),
                  ]

In [None]:
datas = get_eva_exp_datas("duration")
groups = ["2 Features", "3 Features"]
title = "Mean duration of inference on 5x5 grid"
ylabel = "Duration (s)"
ylim = (1, 1e6)
fig = plot_barchart(datas, groups, title, ylabel, ylim, yscale="log")
fig.savefig('../results/full_duration.png', bbox_inches='tight')

In [None]:
datas = get_eva_exp_datas("L2")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_L2_means,
     "std": random_L2_stds})
groups = ["2 Features", "3 Features"]
title = "Mean L2 error on 5x5 grid"
ylabel = "L2 error to ground truth"
ylim = (0, 1.2)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/l2_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="L2", smooth=True)
fig.savefig('../results/l2_errors_comp_temp.png', bbox_inches='tight')

In [None]:
datas = get_eva_exp_datas("Proportion")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_lp_means,
     "std": random_lp_stds})
groups = ["2 Features", "3 Features"]
title = "Mean log proportion error on 5x5 grid"
ylabel = "Log proportion error to ground truth"
ylim = (-2, 4)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/lp_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="Log prop.", smooth=True)
fig.savefig('../results/lp_errors_comp_temp.png', bbox_inches='tight')

In [None]:
datas = get_eva_exp_datas("Order")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_ord_means,
     "std": random_ord_stds})
groups = ["2 Features", "3 Features"]
title = "Mean ordering error on 5x5 grid"
ylabel = "Ordering error to ground truth"
ylim = (0, 4.5)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/ord_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="Order", smooth=True)
fig.savefig('../results/ord_errors_comp_temp.png', bbox_inches='tight')

<h1>Discrepancy based inference scaling up experiment</h1>

In [None]:
gd_9x9_2f = ExperimentGroup("9x9 grid 2 features", [
    ExperimentLog("../results/disc5/gd5_9x9_2f_{}/experiment.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]])
gd_9x9_3f = ExperimentGroup("9x9 grid 3 features", [
    ExperimentLog("../results/disc5/gd5_9x9_3f_{}/experiment.json".format(i))
        for i in [1,2,3]])
gd_13x13_2f = ExperimentGroup("13x13 grid 2 features", [
    ExperimentLog("../results/disc5/gd5_13x13_2f_{}/experiment.json".format(i))
        for i in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,   16,17,18,19,20]])
gd_13x13_3f = ExperimentGroup("13x13 grid 3 features", [
    ExperimentLog("../results/disc5/gd5_13x13_3f_{}/experiment.json".format(i))
        for i in [2,3]])
gd_19x19_2f = ExperimentGroup("19x19 grid 2 features", [
    ExperimentLog("../results/disc5/gd5_19x19_2f_{}/experiment.json".format(i))
        for i in [1,2,3,4,5,  7,8,9,10,11,12,13,14,15,16,17,18,19   ]])
gd_19x19_3f = ExperimentGroup("19x19 grid 3 features", [
    ExperimentLog("../results/disc/gd4_19x19_3f_{}/experiment.json".format(i))
        for i in [  2,3,4,  6,7,8,9,10]])

gd_9x9_2f.print_summary()
gd_9x9_3f.print_summary()
gd_13x13_2f.print_summary()
gd_13x13_3f.print_summary()
gd_19x19_2f.print_summary()
gd_19x19_3f.print_summary()

def get_app_exp_datas(typ=""):
    g9 = [gd_9x9_2f, gd_9x9_3f]
    g13 = [gd_13x13_2f, gd_13x13_3f]
    g19 = [gd_19x19_2f, gd_19x19_3f]
    return ret_object(typ,
                     ("9x9", "g", g9),
                     ("13x13", "r", g13),
                     ("19x19", "b", g19))

In [None]:
datas = get_app_exp_datas("duration")
groups = ["2 Features", "3 Features"]
title = "Mean inference duration"
ylabel = "Duration(s)"
ylim = (0, 50000)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/disc_duration.png', bbox_inches='tight')

In [None]:
datas = get_app_exp_datas("L2")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_L2_means,
     "std": random_L2_stds})
groups = ["2 Features", "3 Features"]
title = "Mean L2 error"
ylabel = "L2 to ground truth"
ylim = (0, 1.2)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/disc_l2_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="L2", smooth=True)
fig.savefig('../results/disc_l2_errors_comp_temp.png', bbox_inches='tight')

In [None]:
datas = get_app_exp_datas("Proportion")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_lp_means,
     "std": random_lp_stds})
groups = ["2 Features", "3 Features"]
title = "Mean log proportion error"
ylabel = "Log proportion error to ground truth"
ylim = (-2, 3)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/disc_lp_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="Log prop.", smooth=True)
fig.savefig('../results/disc_lp_errors_comp_temp.png', bbox_inches='tight')

In [None]:
datas = get_app_exp_datas("Order")
datas.append(
    {"name": "RND",
     "color": "k",
     "means": random_ord_means,
     "std": random_ord_stds})
groups = ["2 Features", "3 Features"]
title = "Mean ordering error"
ylabel = "Ordering error to ground truth"
ylim = (0, 5)
fig = plot_barchart(datas, groups, title, ylabel, ylim)
fig.savefig('../results/disc_ord_errors_comp.png', bbox_inches='tight')

fig = plot_graph(datas, groups, title+" over time", ylabel="Order", smooth=True)
fig.savefig('../results/disc_ord_errors_comp_temp.png', bbox_inches='tight')