# Ecole Benchmark Data Analysis

In [1]:
import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets

sns.set_theme(style="whitegrid")

In [2]:
%matplotlib inline

## Branching Dynamics Benchmark

Statistical Student T-test to check if branching dynamics time and branching rule time ration are sinificatively similar to `1.0`.

We use wall time as time expected to be loast in the `ReverseControl` would not be CPU bound.
We use ratio instread of difference due to high variations in instance solving time.

In [3]:
branching_df = pd.read_csv("data/benchmark-branching.csv")

ratio = branching_df[f"branching_dynamics:wall_time_s"] / branching_df[f"branching_rule:wall_time_s"]
print(f"ratio ~ {ratio.mean():.3f} ± {ratio.std():.3f}")

ratio ~ 1.005 ± 0.022


One sample T-test for the hypothesis that the ratio mean is equal to `1.0`:

In [4]:
stats.ttest_1samp(ratio, 1.0).pvalue

5.6959634663725635e-52

## Observation Benchmark

In [5]:
observation_df = pd.read_csv("data/observation.csv")
observation_df["generator"] = observation_df["name"].map(lambda name: name.split("-")[0])

In [6]:
time_units = { "s": 1.0, "ms": 1e3, "µs": 1e6, "ns": 1e9, }

def plot(observation, time, generator, normalize, unit):
    # Select given generator
    df = observation_df[observation_df["generator"] == generator].copy()

    # Columns with numerical information being plotted
    data_cols_names = [f"ecole:{observation}:{time}_time_s", f"ecole_vs_gasse:{observation}:{time}_time_s"]

    # Change unit and normalize by given quantitiy
    for col_name in data_cols_names:
        df[col_name] *= time_units[unit]
        if normalize is not None:
            df[col_name] = df[col_name] / df[normalize]

    # Unpivot the data to make it in long format
    df = df.melt(
        id_vars=["name"],
        value_vars=data_cols_names,
        value_name=f"{time}_time",
        var_name="Measure",
    )

    # Rename implementation label
    df["Implementation"] = df["Measure"].map(lambda name: name.split(":")[0])
    df.loc[df["Implementation"] == "ecole", "Implementation"] = "Ecole"
    df.loc[df["Implementation"] == "ecole_vs_gasse", "Implementation"] = "Gasse et al."

    # Plot data
    fig, ax=plt.subplots(dpi=200, figsize=(20,10))
    sns.violinplot(
        data=df,
        x="name",
        y=f"{time}_time",
        hue="Implementation",
        split=True,
        inner="quart",
        linewidth=1,
        ax=ax,
    )

    # Customize Presentation
    sns.despine(left=True)
    ax.set(
        title=f"Distribution of Computation Time over {len(df)} Instances",
        xlabel="Generator",
        ylabel=f"{time.capitalize()} Time ({unit})",
    )

In [8]:
_ = ipywidgets.widgets.interact(
    plot,
    observation=["NodeBipartite", "Khalil2016"],
    time=["wall", "cpu"],
    generator=observation_df["generator"].unique(),
    normalize=[None, "n_nodes", "n_vars", "n_cons", "root_nnz", "root_n_cols", "root_n_rows"],
    unit=time_units.keys()
)

interactive(children=(Dropdown(description='observation', options=('NodeBipartite', 'Khalil2016'), value='Node…