In [1]:
from scipy.stats import ttest_ind
from scipy.stats import ks_2samp
from scipy.stats import mannwhitneyu


In [None]:
loss_base = [2.9e3, 2.9e3, 2.9e3, 2.9e3, 3e3, 2.9e3, 3e3, 3e3, 3e3, 2.9e3]

## Baseline + AM

In [3]:
loss_am = [3e3, 3e3, 3e3, 2.9e3, 3.1e3, 3.2e3, 3e3, 3.1e3, 2.9e3, 3e3]
t_stat, p_val = ttest_ind(loss_am, loss_base, equal_var=False)
print("t-stat:", t_stat)
print("p-value:", p_val)

ks_stat, ks_pvalue = ks_2samp(loss_am, loss_base)
print(f"KS Statistic: {ks_stat}")
print(f"KS Test p-value: {ks_pvalue}")

u_stat, mw_pvalue = mannwhitneyu(loss_am, loss_base, alternative='two-sided')
print(f"Mann-Whitney U Statistic: {u_stat}")
print(f"Mann-Whitney U Test p-value: {mw_pvalue}")

t-stat: 2.4
p-value: 0.03068171246721154
KS Statistic: 0.4
KS Test p-value: 0.41752365281777043
Mann-Whitney U Statistic: 76.0
Mann-Whitney U Test p-value: 0.0360909952042232


## Baseline + AA

In [9]:
loss_aa = [3.1e3, 3e3, 2.9e3, 2.9e3, 2.9e3, 3e3, 3e3, 2.9e3, 3e3, 2.9e3]
t_stat, p_val = ttest_ind(loss_aa, loss_base, equal_var=False)
print("t-stat:", t_stat)
print("p-value:", p_val)

ks_stat, ks_pvalue = ks_2samp(loss_aa, loss_base)
print(f"KS Statistic: {ks_stat}")
print(f"KS Test p-value: {ks_pvalue}")

u_stat, mw_pvalue = mannwhitneyu(loss_aa, loss_base, alternative='two-sided')
print(f"Mann-Whitney U Statistic: {u_stat}")
print(f"Mann-Whitney U Test p-value: {mw_pvalue}")

t-stat: 0.7276068751089989
p-value: 0.4770122828436911
KS Statistic: 0.1
KS Test p-value: 1.0
Mann-Whitney U Statistic: 57.0
Mann-Whitney U Test p-value: 0.5758676588974205


## Baseline ++ MM

In [4]:
loss_mm = [3e3, 2.9e3, 2.9e3, 3e3, 3e3, 3e3, 3e3, 3e3, 3e3, 2.9e3]
t_stat, p_val = ttest_ind(loss_mm, loss_base, equal_var=False)
print("t-stat:", t_stat)
print("p-value:", p_val)

ks_stat, ks_pvalue = ks_2samp(loss_mm, loss_base)
print(f"KS Statistic: {ks_stat}")
print(f"KS Test p-value: {ks_pvalue}")

u_stat, mw_pvalue = mannwhitneyu(loss_mm, loss_base, alternative='two-sided')
print(f"Mann-Whitney U Statistic: {u_stat}")
print(f"Mann-Whitney U Test p-value: {mw_pvalue}")

t-stat: 1.3416407864998738
p-value: 0.19646751603492402
KS Statistic: 0.3
KS Test p-value: 0.7869297884777761
Mann-Whitney U Statistic: 65.0
Mann-Whitney U Test p-value: 0.20392477458611868


## One way ANOVA test

In [10]:
from scipy.stats import f_oneway

f_stat, p_val = f_oneway(loss_aa, loss_am, loss_base, loss_mm)
print("F-statistic:", f_stat)
print("p-value:", p_val)
print("ANOVA test result:", "Significant" if p_val < 0.05 else "Not significant")

F-statistic: 2.284974093264249
p-value: 0.09536087409973508
ANOVA test result: Not significant


## Post-hoc Test

In [11]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import pandas as pd

# Flatten the data
all_losses = (
    loss_aa + loss_am + loss_base + loss_mm
)
groups = (
    ['Avg-Avg'] * 10 + ['Avg-Max'] * 10 + ['Max-Avg'] * 10 + ['Max-Max'] * 10
)

# Create DataFrame
df = pd.DataFrame({'Loss': all_losses, 'Group': groups})

# Tukey HSD
tukey = pairwise_tukeyhsd(df['Loss'], df['Group'], alpha=0.05)
print(tukey)


   Multiple Comparison of Means - Tukey HSD, FWER=0.05   
 group1  group2 meandiff p-adj    lower    upper   reject
---------------------------------------------------------
Avg-Avg Avg-Max     60.0 0.2752  -28.1892 148.1892  False
Avg-Avg Max-Avg    -20.0  0.928 -108.1892  68.1892  False
Avg-Avg Max-Max     30.0 0.7964  -58.1892 118.1892  False
Avg-Max Max-Avg    -80.0 0.0869 -168.1892   8.1892  False
Avg-Max Max-Max    -30.0 0.7964 -118.1892  58.1892  False
Max-Avg Max-Max     50.0 0.4324  -38.1892 138.1892  False
---------------------------------------------------------
