# Result Sets for ESEC/FSE 2021

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ethanluoyc/sympais/blob/master/notebooks/results.ipynb)

This notebooks is used to plot the key figures in the paper.

## Setup

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
GIT_TOKEN = ""
if IN_COLAB:
    !pip install -U pip setuptools wheel
    if GIT_TOKEN:
        !pip install git+https://{GIT_TOKEN}@github.com/ethanluoyc/sympais.git#egg=sympais
    else:
        !pip install git+https://github.com/ethanluoyc/sympais.git#egg=sympais

if IN_COLAB:
    !curl -L "https://drive.google.com/uc?export=download&id=1_Im0Ot5TjkzaWfid657AV_gyMpnPuVRa" -o realpaver
    !chmod u+x realpaver
    !cp realpaver /usr/local/bin

### Imports

In [None]:
import collections
import numpy as np
import itertools
import matplotlib
import matplotlib.pyplot as plt
import math
import pandas as pd
import os
import seaborn as sns

%load_ext autoreload
%autoreload 2

### Set up constants

In [None]:
!curl -L "https://drive.google.com/uc?export=download&id=1GDl8ZbbKNb43Y-jK7GLdByfWRjfHJt3L" -o results.tar.gz

In [None]:
!tar xvzf results.tar.gz

In [None]:
sns.set_palette("deep")
PALETTE = {"DMC": 'C0', "qCoral": "C1", "SYMPAIS": "C2", "SYMPAIS-H": "C3"}

In [None]:
RESULT_PATH = './results'

### Helper functions

In [None]:
def read_experiment(log_path,
                    parameter_set=None,
                    job_descriptor='',
                    verbose=False):
    """"Helper function for reading logs from multiple runs."""
    keys = [] if parameter_set is None else list(parameter_set.keys())
    # Extract parameter value lists, one per parameter.
    ordered_values = [parameter_set[key] for key in keys]
    num_parameter_settings = len([_ for _ in itertools.product(*ordered_values)])

    # Now take their cross product. This generates tuples of the form
    # (p1, p2, p3, ...) where p1, p2, p3 are parameter values for the first,
    # second, etc. parameters as ordered in value_set.
    dfs = []
    for parameter_tuple in itertools.product(*ordered_values):
        if job_descriptor is not None:
            name = job_descriptor.format(*parameter_tuple)
        else:
            # Construct name for values.
            name = '-'.join([keys[i] + '_' + str(parameter_tuple[i])
                            for i in range(len(keys))])

        experiment_path = '{}/{}/results.jsonl'.format(log_path, name)
        if verbose:
            print(experiment_path)
        df = pd.read_json(experiment_path, lines=True)
        for i in range(len(keys)):
            df[keys[i]] = parameter_tuple[i]
        dfs.append(df)
    return pd.concat(dfs)
    
def relative_absolute_error(data, reference):
    """Compute the relative absolute error with respect to a refernce point."""
    return np.abs(data - reference) / reference

In [None]:
def rename_method(df, key='method'):
    df = df.copy()
    df[key] = df[key].str.replace(r'^dmc', 'DMC', regex=True)
    df[key] = df[key].str.replace(r'^pimais', 'SYMPAIS', regex=True)
    df[key] = df[key].str.replace(r'^hpimais', 'SYMPAIS-H', regex=True)
    df[key] = df[key].str.replace(r'^stratified', 'qCoral', regex=True)
    return df

## Sphere
This section shows the results for the sphere benchmark.

### Compute reference solution

In [None]:
sphere_reference_result = (read_experiment(
    RESULT_PATH, 
    collections.OrderedDict([
        ("method", ["dmc"]),
        ("num_samples", [int(1e8)]),
        ("d", np.arange(3, 11)),
        ("seed", np.arange(20)),
    ]), 
    "sphere/{}/num_samples-{}_nd-{}_seed-{}"
).groupby("d").mean()
)
sphere_reference_result

### Load benchmark results

In [None]:
sphere_result = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["dmc", "pimais", "hpimais", "stratified"]),
        ("num_samples", [int(1e6)]),
        ("d", np.arange(3, 11)),
        ("seed", np.arange(20)),
    ]),
    "sphere/{}/num_samples-{}_nd-{}_seed-{}",
)
sphere_result

### Plotting

In [None]:
data = (
    sphere_result
    .pipe(rename_method)
    .assign(method=lambda df: pd.Categorical(df.method, categories=['DMC', 'qCoral', 'SYMPAIS', 'SYMPAIS-H']))
    .set_index(['d', 'method'])
    .assign(rae=lambda df: relative_absolute_error(df['mean'], sphere_reference_result['mean']))
    .reset_index()
)

fig, ax = plt.subplots()
sns.barplot(x="d", y="rae", 
            hue="method", data=data, ax=ax)
ax.set_yscale("log")
ax.set(ylabel="RAE", xlabel="$d$")
ax.legend(title=None, fontsize=8);
# plt.tight_layout()
# plt.subplots_adjust(left=.25, bottom=.2, top=.9)
# fig.savefig("images/sphere_num_dimensions_barplot.pdf")

## Torus
This section shows the results for the torus benchmark

### Compute reference solution

In [None]:
torus_reference_result = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["dmc"]),
        ("num_samples", [int(1e8)]),
        ("profile", ['independent', 'correlated']),
        ("seed", np.arange(20)),
    ]),
    "torus/{}/num_samples-{}_profile-{}_seed-{}"
).groupby('profile').mean()
torus_reference_result

### Load benchmark results (independent)

In [None]:
torus_independent_result = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["pimais", "hpimais", "dmc", "stratified"]),
        ("num_samples", [int(1e6)]),
        ("profile", ['independent']),
        ("seed", np.arange(20)),
    ]),
    "torus/{}/num_samples-{}_profile-{}_seed-{}", verbose=False
)

### Plotting 

In [None]:
# Massage data into the desired format
data = (
    torus_independent_result
    .pipe(rename_method)
    .assign(method=lambda df: pd.Categorical(df.method, categories=['DMC', 'qCoral', 'SYMPAIS', 'SYMPAIS-H']))
    .set_index(['profile', 'method'])
    .assign(
        rae=lambda df: relative_absolute_error(df['mean'], torus_reference_result['mean'])
    )
    #.groupby(['profile', 'method']).mean()
).reset_index()

fig, ax = plt.subplots()
#     fig.set_size_inches(3.33 * .5, 3.33 * .5 * .6)

sns.barplot(x="method", y="rae", data=data, 
            ax=ax, palette=PALETTE)
plt.yscale("log")
ax.set(ylabel="RAE", title="Torus (independent)")
#     plt.tight_layout()
plt.subplots_adjust(top=.8, bottom=.2, left=.25)
ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())
# ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator([0.1, .01, .001, 0.0001]))
#     ax.set_ylim((1e-4, 1e-1))
ax.tick_params(axis="x", which="major")
#     ax.tick_params(axis="x", labelrotation=30)
ax.set_xlabel("");

### Load benchmark results (correlated)

In [None]:
torus_correlated_result = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["pimais", "hpimais", "dmc"]),
        ("num_samples", [int(1e6)]),
        ("profile", ['correlated']),
        ("seed", np.arange(20)),
    ]),
    "torus/{}/num_samples-{}_profile-{}_seed-{}", verbose=False
)
data = (
    torus_correlated_result
    .pipe(rename_method)
    # .assign(method=lambda df: pd.Categorical(df.method, categories=['DMC', 'qCoral', 'SYMPAIS', 'SYMPAIS-H']))
    .set_index(['profile', 'method', 'seed'])
    .assign(rae=lambda df: relative_absolute_error(df['mean'], torus_reference_result['mean']))
    # .groupby(['profile', 'method']).mean()
    .reset_index()
)

### Plotting (correlated)

In [None]:
fig, ax = plt.subplots()
#     fig.set_size_inches(3.33 * .5, 3.33 * .5 * .6)

sns.barplot(x="method", y="rae", data=data, ax=ax, palette=PALETTE,
           order=['DMC', 'SYMPAIS', 'SYMPAIS-H']
           )
plt.yscale("log")
ax.set(ylabel="RAE", title="Torus (correlated)")
#     plt.tight_layout()
plt.subplots_adjust(top=.8, bottom=.2, left=.25)
ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())
ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator([0.1, .01, .001, 0.0001]))
#     ax.set_ylim((1e-4, 1e-1))
ax.tick_params(axis="x", which="major")
#     ax.tick_params(axis="x", labelrotation=30)
ax.set_xlabel("");

## Coral
This section shows the results on the set of benchmark problems used by the qCoral paper.

The reference results are computed with Mathematica

### Load reference results

We use Mathematica to compute the reference results.

In [None]:
gt = """
filename,path_index,mean
example-carton-5-0.m,0,7.98577e-07
example-carton-5-0.m,1,1.70593e-11
example-carton-5-0.m,2,1.2167e-05
example-carton-5-0.m,3,9.19246e-07
example-carton-5-0.m,4,0.000161042
example-ckd-epi-0.m,0,0.00053025
example-ckd-epi-0.m,1,0.000197691
example-ckd-epi-0.m,2,0.000307342
example-ckd-epi-0.m,3,0.000547318
example-ckd-epi-0.m,4,8.81089e-05
example-ckd-epi-simple-0.m,0,0.00222638
example-ckd-epi-simple-0.m,1,0.00164958
example-ckd-epi-simple-0.m,2,0.000855157
example-ckd-epi-simple-0.m,3,0.0155994
example-ckd-epi-simple-0.m,4,0.148682
framingham-0.m,0,3.62258e-07
framingham-0.m,1,0.000444308
framingham-0.m,2,0.000523834
framingham-0.m,3,7.78248e-06
framingham-0.m,4,0.00106958
example-cart-12.m,0,4.30306e-05
example-cart-12.m,1,4.03696e-05
example-cart-12.m,2,0.00767075
example-cart-12.m,3,0.000223309
example-cart-12.m,4,0.000223309
"""

In [None]:
import io
coral_reference_results = pd.read_csv(io.StringIO(gt)).rename(columns={
    "filename": "subject",
    "path_index": "path",
    "mean": "mean"
}).set_index(['subject', 'path'])
coral_reference_results

### Load benchmark results

In [None]:
coral_result = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["pimais", "dmc", "stratified"]),
        ("num_samples", [int(1e6)]),
        ("subject", ["example-cart-12.m",
                     "example-carton-5-0.m",
                     "example-ckd-epi-0.m",
                     "example-ckd-epi-simple-0.m",
                     "framingham-0.m"]),
        ("path", np.arange(5)),
        ("seed", np.arange(5)),
    ]),
    "coral/{}/num_samples-{}_subject-{}_path-{}_seed-{}"
)

In [None]:
coral_result

### Plotting

In [None]:
data = (
    coral_result
    .pipe(rename_method)
    .assign(method=lambda df: pd.Categorical(df.method, categories=['DMC', 'qCoral', 'SYMPAIS']))
    .set_index(['subject', 'path', 'method', 'seed'])
    .assign(rae=lambda d: 
            relative_absolute_error(d['mean'], coral_reference_results['mean']))
    .reset_index()
)
data = data.rename(columns={"path": "Path Constraint ID"})
data["subject"] = data["subject"].str.extract(r"(example-)?([a-z-\d]+)\.").iloc[:, 1]
g = sns.catplot(
    x="Path Constraint ID",
    y="rae",
    col="subject",
    hue="method",
#     ci="sd",
    data=data,
    kind="bar",
    sharey=True,
    sharex=False,
    palette=PALETTE,
    legend=True,
#     col_wrap=3,
    height=2
)
g.set(yscale="log")
# plt.gcf().set_size_inches((6, 6/4))
g.set_titles(col_template="{col_name}")
g.axes.flat[0].set(ylabel="RAE");
# g.axes[3].set(ylabel="RAE");

## ACAS Xu

In [None]:
import io
acasxu_reference_results = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["dmc"]),
        ("num_samples", [int(1e7)]),
        ("path", np.arange(5)),
        ("seed", np.arange(20)),
    ]),
    "acasxu/{}/num_samples-{}_path-{}_seed-{}"
).groupby('path').mean()
acasxu_reference_results

In [None]:
acasxu_results = read_experiment(
    RESULT_PATH,
    collections.OrderedDict([
        ("method", ["dmc", "stratified", "pimais"]),
        ("num_samples", [int(1e6)]),
        ("path", np.arange(5)),
        ("seed", np.arange(20)),
    ]),
    "acasxu/{}/num_samples-{}_path-{}_seed-{}"
)


In [None]:
data = (
    acasxu_results
    .pipe(rename_method)
    .assign(method=lambda df: pd.Categorical(df.method, categories=['DMC', 'qCoral', 'SYMPAIS']))
    .set_index(['path', 'method', 'seed'])
    .assign(rae=lambda d: relative_absolute_error(d['mean'], acasxu_reference_results['mean']))
    .reset_index()
)

In [None]:
fig, ax = plt.subplots()
sns.barplot(x="path", y="rae", hue="method", data=data, ax=ax)
ax.set(ylabel="RAE", xlabel="Path Constraint ID", yscale='log')
ax.set_xlabel("Path Constraint ID")
plt.tight_layout()
# plt.legend(fontsize=6)
# fig.savefig("images/acasxu_barplot.pdf")