In [2]:
import pandas as pd

experiments = [
    'standard',
    'p0',
    'p2',
    'p4',
    'p6',
    'p8',
    'p10'
]

seeds = [f's{i}' for i in range(1, 11)]
tests = {}
fits = {}

base_dir = '../../datalake/ppsn'

for experiment in experiments:
    tests_data = f'{base_dir}/{experiment}/data/xdpole/runstats'
    test_group = []
    fit_group = []

    for seed in seeds:
        seed_test = pd.read_csv(f'{tests_data}/{seed}_test.csv')
        score = seed_test.score[0]
        test_group.append(score)
        seed_fit = pd.read_csv(f'{tests_data}/{seed}_run.csv')
        fit = seed_fit.bestgfit.max()
        fit_group.append(fit)
    tests[experiment] = test_group
    fits[experiment] = fit_group

print(tests)
print(fits)

{'standard': [738.231, 721.054, 709.731, 733.393, 703.829, 709.686, 745.689, 728.658, 700.056, 719.041], 'p0': [778.669, 777.865, 741.364, 768.356, 769.256, 753.891, 781.861, 771.166, 742.006, 759.822], 'p2': [771.926, 749.588, 731.299, 760.613, 753.559, 742.813, 760.459, 760.386, 724.978, 757.86], 'p4': [764.86, 740.927, 730.187, 754.506, 745.753, 739.996, 759.244, 751.35, 720.928, 740.391], 'p6': [751.613, 737.487, 715.329, 736.101, 707.831, 730.158, 748.743, 754.632, 710.772, 730.828], 'p8': [740.34, 711.188, 695.044, 714.608, 676.134, 718.718, 739.912, 729.475, 702.199, 722.238], 'p10': [695.632, 682.197, 690.789, 686.37, 665.628, 657.82, 683.192, 724.364, 675.498, 671.255]}
{'standard': [723.62, 721.62, 727.1, 730.67, 703.21, 721.33, 730.42, 721.12, 717.45, 717.77], 'p0': [769.23, 779.26, 760.77, 771.69, 767.11, 765.82, 777.27, 750.0, 759.16, 769.33], 'p2': [754.77, 755.2, 761.64, 757.03, 749.18, 755.37, 754.16, 751.57, 742.08, 765.98], 'p4': [748.65, 738.58, 752.44, 752.06, 731.2

In [3]:
from scipy import stats

def kruskal(groups):
    s, p = stats.kruskal(*groups.values())
    if p < 0.001:
        print(f'different distributions: p = {p}')

In [4]:
def mannwhitneyu(groups):
    results = {}
    tests = experiments.copy()
    for experiment, score in groups.items():
        for experiment_ in tests:
            if experiment != experiment_:
                score_ = groups.get(experiment_)
                _, p = stats.mannwhitneyu(score, score_)
                test = f"{experiment}-{experiment_}"
                results[test] = p
        tests.remove(experiment)
    return results

In [5]:
def hypothesis_validation(results):
    k = len(list(results.keys()))
    print(k)
    a = 0.05 / k
    print(a)
    for test, p in results.items():
        if p < a:
            print(f"{test} ({p})")

In [6]:
print('\ntests')
kruskal(tests)
tests_results = mannwhitneyu(tests)
hypothesis_validation(tests_results)


tests
different distributions: p = 8.052346314455685e-09
21
0.002380952380952381
standard-p0 (0.00032983852077799353)
standard-p2 (0.0017062493689195964)
standard-p10 (0.0010079762403767444)
p0-p6 (0.0013149446697132139)
p0-p8 (0.00018267179110955002)
p0-p10 (0.00018267179110955002)
p2-p8 (0.0007685389131627665)
p2-p10 (0.00018267179110955002)
p4-p8 (0.0013149446697132139)
p4-p10 (0.00024612812790522973)
p6-p10 (0.00043963875262656454)


In [7]:
print('\nfits')
kruskal(fits)
fits_results = mannwhitneyu(fits)
hypothesis_validation(fits_results)


fits
different distributions: p = 4.0824961823327484e-11
21
0.002380952380952381
standard-p0 (0.00018267179110955002)
standard-p2 (0.00018267179110955002)
standard-p4 (0.00043963875262656454)
standard-p10 (0.0005828399431792743)
p0-p4 (0.00032983852077799353)
p0-p6 (0.00018267179110955002)
p0-p8 (0.00018267179110955002)
p0-p10 (0.00018267179110955002)
p2-p4 (0.0017062493689195964)
p2-p6 (0.00018267179110955002)
p2-p8 (0.00018267179110955002)
p2-p10 (0.00018267179110955002)
p4-p8 (0.0005828399431792743)
p4-p10 (0.00018267179110955002)
p6-p10 (0.00032983852077799353)
