In [7]:
import os

import sys
sys.path.append("../../")

from tests.multi_ctrs.helpers import load_queries, get_collective_distribution

In [21]:
samples_df = load_queries()
sasi, masi, mami = get_collective_distribution(samples_df, model="GPTo4Mini", method="prototype_test_prompting_few_shot", mami_round=7)

c_p_143_pop_high_hard
c_p_15_pop_high_hard
c_p_5_pop_high_hard
c_p_155_pop_high_hard
c_p_116_pop_high_hard
c_p_162_pop_high_medium
c_p_113_pop_high_medium
c_p_130_pop_high_medium
c_p_131_pop_high_medium
c_p_94_pop_high_medium
c_p_67_pop_high_sustainable
c_p_0_pop_high_sustainable
c_p_43_pop_high_sustainable
c_p_68_pop_high_sustainable
c_p_38_pop_high_sustainable
c_p_118_pop_low_medium
c_p_24_pop_low_medium
c_p_119_pop_low_medium
c_p_60_pop_low_medium
c_p_77_pop_low_medium
c_p_113_pop_low_sustainable
c_p_147_pop_low_sustainable
c_p_3_pop_low_sustainable
c_p_169_pop_low_sustainable
c_p_54_pop_low_sustainable
c_p_136_pop_medium_hard
c_p_92_pop_medium_hard
c_p_196_pop_medium_hard
c_p_36_pop_medium_hard
c_p_167_pop_medium_hard
c_p_160_pop_medium_medium
c_p_98_pop_medium_medium
c_p_133_pop_medium_medium
c_p_80_pop_medium_medium
c_p_78_pop_medium_medium
c_p_120_pop_medium_sustainable
c_p_47_pop_medium_sustainable
c_p_14_pop_medium_sustainable
c_p_28_pop_medium_sustainable
c_p_150_pop_medium_s

## Pairwise T-Test

In [10]:
from scipy.stats import ttest_rel

t_stat, p_value = ttest_rel(sasi, mami)

print("t-statistic:", t_stat)
print("p-value:", p_value)

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("The difference is statistically significant (reject H0).")
else:
    print("The difference is not statistically significant (fail to reject H0).")

t-statistic: -13.055754365912268
p-value: 9.814523884560742e-17
The difference is statistically significant (reject H0).


## One-Way ANOVA

In [11]:
## Anova

from scipy.stats import f_oneway, ttest_ind
from itertools import combinations
import numpy as np

# Step 1: One-way ANOVA
f_stat, p_anova = f_oneway(sasi, masi, mami)
print("ANOVA F-statistic:", f_stat)
print("ANOVA p-value:", p_anova)

# Step 2: If ANOVA is significant, do pairwise t-tests with Bonferroni correction
alpha = 0.05
if p_anova < alpha:
    print("At least one group differs significantly (ANOVA significant).")
    # Do pairwise t-tests
    groups = {'is_baseline': sasi, 'masi': masi, 'mami': mami}
    combos = list(combinations(groups.keys(), 2))
    corrected_alpha = alpha / len(combos)  # Bonferroni correction
    print(f"\nBonferroni-corrected alpha: {corrected_alpha:.4f}")
    for g1, g2 in combos:
        t_stat, p_val = ttest_ind(groups[g1], groups[g2])
        print(f"{g1} vs {g2} --> t-stat: {t_stat:.3f}, p: {p_val:.4f}", 
              "=> Significant" if p_val < corrected_alpha else "=> Not significant")
else:
    print("No significant differences found between group means (ANOVA not significant).")

ANOVA F-statistic: 102.59524313642473
ANOVA p-value: 1.3125287510355047e-27
At least one group differs significantly (ANOVA significant).

Bonferroni-corrected alpha: 0.0167
sasi vs masi --> t-stat: -14.473, p: 0.0000 => Significant
sasi vs mami --> t-stat: -12.977, p: 0.0000 => Significant
masi vs mami --> t-stat: -0.249, p: 0.8036 => Not significant


In [19]:
import numpy as np
import pandas as pd
from statsmodels.sandbox.stats.multicomp import MultiComparison
from scipy.stats import ttest_ind

# Example data
data = pd.DataFrame({
    'value': [23, 20, 21, 30, 28, 29, 17, 19, 18],
    'group': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
})

# Create MultiComparison object
mc = MultiComparison(sasi, masi)

# Perform all pairwise independent t-tests with Bonferroni correction
result = mc.allpairtest(ttest_ind, method='bonf')

# Print summary
print(result[0])

Test Multiple Comparison ttest_ind 
FWER=0.05 method=bonf
alphacSidak=0.02, alphacBonf=0.017
group1 group2   stat   pval  pval_corr reject
---------------------------------------------
     A      B -7.2732 0.0019    0.0057   True
     A      C  3.1623 0.0341    0.1023  False
     B      C 13.4722 0.0002    0.0005   True
---------------------------------------------
