In [None]:
import numpy as np
import pandas as pd
from plotly import graph_objects as go

# Metric

In [None]:
metric = "similarity"
epsilon_range = [0.01, 1000]

# Data

In [None]:
data = pd.read_csv("data/benchmark_results.csv")
prediction = data.loc[data.exp_idx == "high_baseline"][metric].iloc[0]
majority = data.loc[data.exp_idx == "low_baseline"][metric].iloc[0]

# Visual

In [None]:
colors = {
    "priv_bayes": (0, 100, 80),
    "synth_pop": (100, 0, 80),
    "histogram": (100, 80, 0),
    
}

def get_engine_traces(data, engine, metric):
    data = data.loc[(data["engine"] == engine) & (data["epsilon"].between(*epsilon_range))]
    avg_data = data.groupby("epsilon").mean().reset_index()
    std_data = data.groupby("epsilon").std().reset_index()
    
    x, y, y_std = list(avg_data.epsilon), avg_data[metric].to_numpy(), std_data[metric].to_numpy()
    y_upper, y_lower = np.clip(list(y + y_std), None, 1).tolist(), np.clip(list(y - y_std), 0, None).tolist()
    
    color = colors[engine]
    traces = [
        go.Scatter(
        x=x,
        y=y,
        line=dict(color=f'rgb({color[0]},{color[1]},{color[2]})'),
        mode='lines',
        name=f"{engine}"
    ),
    go.Scatter(
        x=x+x[::-1], # x, then x reversed
        y=y_upper+y_lower[::-1], # upper, then lower reversed
        fill='toself',
        fillcolor=f'rgba({color[0]},{color[1]},{color[2]},0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        hoverinfo="skip",
        showlegend=False
    )]
    
    return traces

In [None]:
traces = [    ]
titles = {
    "similarity": "mean marginal similarity"
}

for engine in data.engine.dropna().unique():
    traces += get_engine_traces(data, engine, metric)
    
if pd.notnull(prediction):
    traces += [    
    go.Scatter(
        x=[data.loc[data.epsilon >= epsilon_range[0]].epsilon.min(), data.loc[data.epsilon <= epsilon_range[1]].epsilon.max()],
        y=[prediction, prediction],
        mode='lines',
        line=dict(dash='dash', color="red"),
        name=f"predictive baseline"
    )]
    
if pd.notnull(majority):
    traces += [    
        go.Scatter(
            x=[data.loc[data.epsilon >= epsilon_range[0]].epsilon.min(), data.loc[data.epsilon <= epsilon_range[1]].epsilon.max()],
            y=[majority, majority],
            mode='lines',
            line=dict(dash='dash', color="lightskyblue"),
            name=f"majority baseline"
        )]

fig = go.Figure(traces)

fig.update_xaxes(type="log")
fig.update_layout(
    xaxis_title="epsilon",
    yaxis_title=titles.get(metric, metric),
    legend=dict(
        yanchor="top",
        y=0.3,
        xanchor="left",
        x=0.85
    )
)


fig.show()

# Performance

In [None]:
perf_data = pd.read_csv("data/speed_results.csv")

## Mean

In [None]:
mean = perf_data.groupby("baseline")[["train", "gen"]].mean()
mean.loc["speed_up"] = mean.loc["DataSynthesizer"] / mean.loc["DPART"]
mean.head()