In [1]:
import matplotlib.pyplot as plt
import polars as pl
from scipy import stats
import seaborn as sns

from lib import (
    load_dataset,
    compute_accuracy_results,
    process_parameter_dataset,
    process_runtime_dataset,
    apply_hypothesis_test,
    save_fig,
    format_network_types,
)

pl.Config.set_fmt_str_lengths(36);

## Experiment 1: Parameters

### Accuracy

In [3]:
df = load_dataset("send-coefficient")

In [4]:
results = compute_accuracy_results(
    process_parameter_dataset(df),
    parameter="send_coefficient",
    percentiles=[0, 0.01, 0.1, *range(1, 7)],
)

In [None]:
results.aggregated.write_csv('outputs/send-coefficient_accuracy_aggregate.csv')

In [None]:
data = results.tabular.filter(pl.col("send_coefficient") >= 1)
g = sns.FacetGrid(
    data,
    hue="send_coefficient",
    xlim=(0.94, 1.005),
    ylim=(0.90, 1.005),
    palette="GnBu_d",
    height=5,
    legend_out=False,
)
g.map_dataframe(sns.ecdfplot, y="accuracy", complementary=True)
g.set_axis_labels("Proportion", "Accuracy")
g.add_legend(title="Send coefficient")
save_fig(g, "outputs/send-coefficient_accuracy_aggregate")

In [None]:
data = results.tabular.filter(pl.col("send_coefficient") >= 1).sample(fraction=0.1)
data = format_network_types(data)
g = sns.FacetGrid(
    data,
    col="network_type",
    col_order=data["network_type"].unique().sort(),
    hue="send_coefficient",
    col_wrap=1,
    xlim=(0.88, 1.005),
    ylim=(0.90, 1.005),
    despine=False,
    palette="GnBu_d",
    aspect=4,
)
g.map_dataframe(sns.ecdfplot, y="accuracy", complementary=True, alpha=0.7)
g.set_axis_labels("Proportion", "Accuracy")
g.set_titles("{col_name}")
g.add_legend(title="Send coefficient")
save_fig(g, "outputs/send-coefficient_accuracy_network-type")

In [None]:
data = results.tabular
data = data.filter(pl.col("send_coefficient") >= 1)
data = data.sample(fraction=0.5)
g = sns.FacetGrid(
    data,
    hue="send_coefficient",
    col="network_type",
    height=5,
    sharex=False,
    sharey=False,
)
g.map_dataframe(sns.ecdfplot, "n_receives")
g.add_legend()
plt.show()

## Tolerance experiment

In [None]:
df = load_dataset('tolerance')

In [None]:
results = compute_accuracy_results(
    process_parameter_dataset(df),
    parameter="tolerance",
    percentiles=[0, 0.01, 0.1, 1],
)

In [None]:
results.aggregated

In [None]:
data = results.tabular.sample(fraction=0.1)
g = sns.FacetGrid(
    data,
    hue="tolerance",
    xlim=(0.985, 1.005),
    # ylim=(0.90, 1.005),
    palette="GnBu_d",
    height=5,
    legend_out=False,
)
g.map_dataframe(sns.ecdfplot, y="accuracy", complementary=True)
g.set_axis_labels("Proportion", "Accuracy")
g.add_legend(title="Tolerance")

In [None]:
data = df.sample(fraction=0.1).to_pandas()
g = sns.FacetGrid(data, sharey=False)
g.map_dataframe(sns.scatterplot, x='tolerance', y='n_receives')

## Efficiency experiments

In [None]:
df = load_dataset('send-coefficient')
df = process_parameter_dataset(df)
df.head(3)

## Experiment 2: Runtime baseline

**Objective**: Determine if the runtime across distribution types is the same.

In [None]:
df = load_dataset('runtime-baseline')
df = process_runtime_dataset(df)


ANOVA assumes normality. Use the Shapiro-Wilks test.

https://en.wikipedia.org/wiki/Analysis_of_variance#Assumptions

https://www.pythonfordatascience.org/parametric-assumptions-python

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html#scipy.stats.shapiro

In [None]:
apply_hypothesis_test(df, stats.shapiro, by_distributions=True)

In [None]:
apply_hypothesis_test(df, stats.shapiro, by_distributions=True, by_network_type=True)

The $p$ values are very low, which suggests the null hypothesis of normality is rejected.

To use non-parametric ANOVA, we must still ensure the homoscedasticity assumption holds.

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fligner.html

In [None]:
apply_hypothesis_test(df, stats.fligner, by_distributions=True)

In [None]:
apply_hypothesis_test(df, stats.fligner, by_distributions=True, by_network_type=True)

The $p$ value is high, which indicates the null hypothesis of homoscedasticity cannot be rejected.

The Kruskal-Wallis test is the non-parametric equivalent of one-way ANOVA.

https://en.wikipedia.org/wiki/Kruskal–Wallis_test

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html#scipy.stats.kruskal

In [None]:
apply_hypothesis_test(df, stats.kruskal, by_distributions=True)

In [None]:
apply_hypothesis_test(df, stats.kruskal, by_distributions=True, by_network_type=True)

A high $p$ value indicates that the null hypothesis cannot be rejected.

**Conclusion:** There is no statistically significant difference in runtime across data distributions.

## Runtime experiment

In [None]:
df = load_dataset("runtime")
df = process_runtime_dataset(df)