In [112]:
import numpy as np
import polars as pl
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.stats.api as sm

## Runtime basline experiment

**Objective**: Determine if the runtime across distribution types is the same.

We cannot use ANOVA because the samples are not normally distributed.

https://www.pythonfordatascience.org/parametric-assumptions-python

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html#scipy.stats.f_oneway

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html#scipy.stats.shapiro

In [99]:
def get_runtimes(df, net_type, as_numpy=False):
    samples = (
        df
        .filter(pl.col.key != '1')
        .filter(pl.col.net_type == net_type)
        .group_by(['ctr_type', 'svr_type', 'str_type'])
        .agg(pl.col.mp_runtime)
        .collect()
        .get_column('mp_runtime')
        .to_list()
    )
    return np.array(samples) if as_numpy else samples

In [3]:
df = pl.scan_parquet('./data/runtime-baseline/dataset.parquet')
net_types = df.collect().get_column('net_type').unique()

In [100]:
for net_type in net_types:
    data = get_runtimes(df, net_type, as_numpy=True).flatten()
    print(net_type, '\t', stats.shapiro(data))

RandomRegular 	 ShapiroResult(statistic=0.31384527727963496, pvalue=1.1721280890703945e-16)
GnmRandom 	 ShapiroResult(statistic=0.46285027473053053, pvalue=8.615878127357973e-15)
BarabasiAlbert 	 ShapiroResult(statistic=0.8246320735534654, pvalue=8.402036681101393e-08)
WattsStrogatz 	 ShapiroResult(statistic=0.905059682305744, pvalue=4.909102938770769e-05)


We can use the nonparametric test instead to compare the medians across samples.

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html#scipy.stats.kruskal

In [101]:
for net_type in net_types:
    samples = get_runtimes(df, net_type)
    print(net_type, '\t', stats.kruskal(*samples))

RandomRegular 	 KruskalResult(statistic=6.744449984184931, pvalue=0.45596385748786317)
GnmRandom 	 KruskalResult(statistic=10.159023455856987, pvalue=0.1797394478879069)
BarabasiAlbert 	 KruskalResult(statistic=8.056005933551345, pvalue=0.32768128702517546)
WattsStrogatz 	 KruskalResult(statistic=5.176282429297923, pvalue=0.6384603989978338)
