In [1]:
import hvplot.polars
import polars as pl
import statsmodels.stats.api as sm

import lib

from scipy import stats

%opts magic unavailable (pyparsing cannot be imported)
%compositor magic unavailable (pyparsing cannot be imported)


## Runtime basline experiment

**Objective**: Determine if the runtime across distribution types is the same.

We cannot use ANOVA because the samples are not normally distributed.

https://www.pythonfordatascience.org/parametric-assumptions-python

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html#scipy.stats.f_oneway

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html#scipy.stats.shapiro

In [2]:
df = lib.load_dataset('runtime-baseline')
network_types = lib.get_network_types(df)

In [3]:
for network_type in network_types:
    data = lib.compute_runtime_results(df, network_type).flatten()
    print(network_type, '\t', stats.shapiro(data))

BarabasiAlbert 	 ShapiroResult(statistic=0.8246320735534654, pvalue=8.402036681101393e-08)
GnmRandom 	 ShapiroResult(statistic=0.46285027473053075, pvalue=8.615878127358096e-15)
RandomRegular 	 ShapiroResult(statistic=0.31384527727963507, pvalue=1.1721280890703945e-16)
WattsStrogatz 	 ShapiroResult(statistic=0.905059682305744, pvalue=4.909102938770769e-05)


We can use the nonparametric test instead to compare the medians across samples.

https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html#scipy.stats.kruskal

In [4]:
for network_type in network_types:
    samples = lib.compute_runtime_results(df, network_type)
    print(network_type, '\t', stats.kruskal(*samples))

BarabasiAlbert 	 KruskalResult(statistic=8.056005933551345, pvalue=0.32768128702517546)
GnmRandom 	 KruskalResult(statistic=10.159023455856987, pvalue=0.1797394478879069)
RandomRegular 	 KruskalResult(statistic=6.744449984184931, pvalue=0.45596385748786317)
WattsStrogatz 	 KruskalResult(statistic=5.17628242929798, pvalue=0.6384603989978268)


## Parameter experiment

In [6]:
df = lib.load_parameter_dataset('parameter')

In [7]:
accuracy = lib.compute_accuracy_results(df)
accuracy.hvplot.box(y='accuracy', by='send_coefficient', ylim=[0.935, 1.005])

In [8]:
result_0 = lib.compute_efficiency_results(df, 0.8)
result_1 = lib.compute_efficiency_results(df, 1)

In [9]:
lib.get_efficiency_box_plot(result_0, 'n_receives', 'n_receives_percent')

AttributeError: 'LazyFrame' object has no attribute 'plot'