In [31]:
import pandas as pd
import numpy as np

experiments = ['standard', 'critic-curriculum', 'systematic-curriculum']
seeds = [i+1 for i in range(10)]
tests = {}

base_dir = '../../evorobot-paper/data'

for exp in experiments:
    exp_tests = []
    for seed in seeds:
        t = np.load(f"{base_dir}/{exp}/tests/testS{seed}.npy")
        exp_tests += list(t)
    tests[exp] = exp_tests 

In [32]:
from scipy import stats

def kruskal(groups):
    s, p = stats.kruskal(*groups.values())
    if p < 0.001:
        print(f'different distributions: p = {p}')

In [33]:
def mannwhitneyu(groups):
    results = {}
    tests = experiments.copy()
    for experiment, score in groups.items():
        for experiment_ in tests:
            if experiment != experiment_:
                score_ = groups.get(experiment_)
                _, p = stats.mannwhitneyu(score, score_)
                test = f"[{experiment}] <> [{experiment_}]"
                results[test] = np.around(p, 5)
        tests.remove(experiment)
    return results

In [34]:
def hypothesis_validation(results):
    k = len(list(results.keys()))
    a = np.around(0.05 / k, 5)
    print(f"correction: k({k}), a({a})\n")
    for test, p in results.items():
        if p < a:
            print(f"{test} ({p})")

In [35]:
kruskal(tests)
tests_results = mannwhitneyu(tests)
hypothesis_validation(tests_results)

different distributions: p = 8.806680295330267e-06
correction: k(3) a(0.01667)

[standard] <> [critic-curriculum] (0.00361)
[standard] <> [systematic-curriculum] (0.00018)
[critic-curriculum] <> [systematic-curriculum] (0.00018)
