In [1]:
import matplotlib.pyplot as plt
import numpy as np

from scipy.stats import ttest_rel

from div_fgan_data import scene_data, yeast_data


In [2]:
def collect_stats(data, **kwargs):
    """Collect statistics about model """
    model_data = dict()

    for model, values in data.items():
        model_stats = {
            stat: func(values)
            for stat, func in kwargs.items()    
        }
        model_data[model] = model_stats

    return model_data

In [3]:
def ttest_wrt_base(base_values):
    def wrapper(values):
        pval = ttest_rel(base_values, values, alternative="two-sided").pvalue
        print(pval)
        return pval
    return wrapper

In [4]:
def collect_pvals(data, base_name, parse_name=True, omit_base=True):
    base_values = data[base_name]
    stat_collect = dict(mean=np.mean, pval=ttest_wrt_base(base_values))
    stats = collect_stats(data, **stat_collect)

    if parse_name:
        stats = {num: value for name, value in stats.items() if (num := int(name[18:])) >= int(omit_base)}

    return stats

In [5]:
def filter_data(data, hard):
    return {name: values for name, values in data.items() if ("hard" in name) == hard}

In [6]:
for dataset in [scene_data, yeast_data]:
    for stat in ["map", "exp"]:
        del dataset[stat]["logging"]
        del dataset[stat]["logger-og"]


exp_scene_pvals_soft = collect_pvals(filter_data(scene_data["exp"], hard=False), base_name="nn-soft 32-8 fgan 0")
exp_scene_pvals_hard = collect_pvals(filter_data(scene_data["exp"], hard=True), base_name="nn-hard 32-8 fgan 0")
map_scene_pvals_soft = collect_pvals(filter_data(scene_data["map"], hard=False), base_name="nn-soft 32-8 fgan 0")
map_scene_pvals_hard = collect_pvals(filter_data(scene_data["map"], hard=True), base_name="nn-hard 32-8 fgan 0")

exp_yeast_pvals_soft = collect_pvals(filter_data(yeast_data["exp"], hard=False), base_name="nn-soft 32-8 fgan 0")
exp_yeast_pvals_hard = collect_pvals(filter_data(yeast_data["exp"], hard=True), base_name="nn-hard 32-8 fgan 0")
map_yeast_pvals_soft = collect_pvals(filter_data(yeast_data["map"], hard=False), base_name="nn-soft 32-8 fgan 0")
map_yeast_pvals_hard = collect_pvals(filter_data(yeast_data["map"], hard=True), base_name="nn-hard 32-8 fgan 0")

nan
0.0032362091566916296
0.00013560425535850124
4.561029703957218e-06
6.103352172641193e-05
1.4134998850773327e-07
3.674935187546454e-07
8.01454221499228e-06
1.712598288648977e-08
3.7785654970746327e-06
nan
0.012715819201414343
0.054434801409420905
0.1062975041117151
0.06644876872856197
0.018301789302238827
0.0016090034199665302
0.0015096765041002914
0.0032964586361730625
0.0033443683184533443
nan
7.079847941035653e-05
1.5007868614751134e-05
1.5708400633378357e-06
2.2275906741443295e-05
3.691268491134129e-08
1.3768837176619e-07
1.8696302736335792e-06
4.996652971689505e-09
1.081841867555961e-06
nan
0.37367386367622724
0.09361270443038042
0.00033508480212897905
0.00017444808971085683
7.042543823315768e-05
1.104920161747947e-05
7.640421554541303e-06
2.8536942898686235e-05
4.318386087795196e-05
nan
0.09838265550670762
0.0040475925650861186
0.06622327163930108
0.0054936088487694415
0.06865145732534193
0.10862695855134796
0.014449943036968829
0.008589021490832969
0.012538594535949377
nan
0.

In [7]:
exp_scene_pvals_hard

{1: {'mean': 2.0915, 'pval': 0.012715819201414343},
 2: {'mean': 2.2118499999999996, 'pval': 0.054434801409420905},
 3: {'mean': 2.4235999999999995, 'pval': 0.1062975041117151},
 4: {'mean': 2.4460100000000002, 'pval': 0.06644876872856197},
 5: {'mean': 2.48548, 'pval': 0.018301789302238827},
 10: {'mean': 2.54692, 'pval': 0.0016090034199665302},
 15: {'mean': 2.5392200000000003, 'pval': 0.0015096765041002914},
 20: {'mean': 2.5015800000000006, 'pval': 0.0032964586361730625},
 30: {'mean': 2.5011799999999997, 'pval': 0.0033443683184533443}}

In [8]:
def format_pvalue(pvalue, t=0.001, sig_nums=3):
    if pvalue < t:
        return f"< {str(t)[1:]}"

    pvalue = round(pvalue, sig_nums)
    return str(pvalue)[1:]

n_steps = [1, 2, 3, 4, 5, 10, 15, 20, 30]
rows = []
for i in [1, 2, 3, 4, 5, 10, 15, 20, 30]:
    row = str(i)
    
    # order: scene, yeast and then Soft Hard
    values = [
        format_pvalue(exp_scene_pvals_soft[i]["pval"]),
        format_pvalue(exp_scene_pvals_hard[i]["pval"]),
        format_pvalue(exp_yeast_pvals_soft[i]["pval"]),
        format_pvalue(exp_yeast_pvals_hard[i]["pval"])
    ]
    row += " & " + " & ".join(values) + "\\\\\n"
    rows.append(row)

print("".join(rows))

1 & .003 & .013 & .098 & .606\\
2 & < .001 & .054 & .004 & .087\\
3 & < .001 & .106 & .066 & .042\\
4 & < .001 & .066 & .005 & .009\\
5 & < .001 & .018 & .069 & .031\\
10 & < .001 & .002 & .109 & .006\\
15 & < .001 & .002 & .014 & .002\\
20 & < .001 & .003 & .009 & .121\\
30 & < .001 & .003 & .013 & .003\\



In [9]:
rows = []
for i in [1, 2, 3, 4, 5, 10, 15, 20, 30]:
    row = str(i)
    
    # order: scene, yeast and then Soft Hard
    values = [
        format_pvalue(map_scene_pvals_soft[i]["pval"]),
        format_pvalue(map_scene_pvals_hard[i]["pval"]),
        format_pvalue(map_yeast_pvals_soft[i]["pval"]),
        format_pvalue(map_yeast_pvals_hard[i]["pval"])
    ]
    row += " & " + " & ".join(values) + "\\\\\n"
    rows.append(row)

print("".join(rows))

1 & < .001 & .374 & .652 & .333\\
2 & < .001 & .094 & .254 & .666\\
3 & < .001 & < .001 & .886 & .684\\
4 & < .001 & < .001 & .379 & .611\\
5 & < .001 & < .001 & .884 & .877\\
10 & < .001 & < .001 & .937 & .276\\
15 & < .001 & < .001 & .835 & .142\\
20 & < .001 & < .001 & .369 & .979\\
30 & < .001 & < .001 & .886 & .265\\

