In [None]:
import subprocess
import shlex
import re

import numpy as np

from matplotlib import pyplot as plt
from scipy.stats import combine_pvalues
from IPython.display import display, Markdown
from tqdm import tqdm

In [None]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 400

def show():
    plt.savefig(f"ent.{show.fig_counter}.pdf", dpi='figure', transparent=True, bbox_inches='tight')
    show.fig_counter += 1
    show.plt_show()

show.fig_counter = 0
show.plt_show = plt.show

plt.show = show

In [None]:
RANDOMNESS_PATTERN = re.compile(r"exceed this value (\d+\.\d+) percent")

CC_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")

# ENT - pseudorandom number sequence test

In [None]:
def test_randomness_ent(
    mode, seeds=[42], limit='1GB',
):
    outputs = []
    
    for seed in tqdm(seeds):
        generator = subprocess.Popen(shlex.split(
            f"cargo run --release --quiet -- --seed {seed} --limit {limit} {CC_PATTERN.sub('-', mode).lower()}"
        ), stdout=subprocess.PIPE)
        ent = subprocess.run(
            shlex.split("./ent"), stdin=generator.stdout,
            check=True, capture_output=True, text=True
        )

        match = RANDOMNESS_PATTERN.search(ent.stdout)
        
        if match is not None:
            pvalue = float(RANDOMNESS_PATTERN.search(ent.stdout).group(1)) / 100.0

            outputs.append((pvalue, ent.stdout))
    
    gof_statistic, gof_pvalue = combine_pvalues([pvalue for pvalue, output in outputs])

    if gof_pvalue <= 0.01 or gof_pvalue >= 0.99:
        display(Markdown(f"## <span style='color:purple'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.05 or gof_pvalue >= 0.95:
        display(Markdown(f"## <span style='color:red'><u>{mode}</u></span>"))
    elif gof_pvalue <= 0.1 or gof_pvalue >= 0.9:
        display(Markdown(f"## <span style='color:orange'>*{mode}*</span>"))
    else:
        display(Markdown(f"## <span style='color:green'>{mode}</span>"))
        
    most_suspect = None
    
    for (pvalue, output) in outputs:
        suspicion = 1 - min(pvalue, 1 - pvalue)
        
        if (most_suspect is None) or (suspicion > most_suspect[0]):
            most_suspect = (suspicion, output)
        
    display(Markdown("#### Fisher’s combined Chi-squared:"))
    display(Markdown(f"* p-value: {gof_pvalue}\n* statistic: {gof_statistic}"))
    
    plt.title("Distribution of p-values")
    plt.xlabel("p")
    plt.ylabel("pdf")
    plt.hist([pvalue for pvalue, output in outputs], density=True)
    plt.show()

    display(Markdown("#### Test output (most suspect):"))
    display(Markdown(">" + most_suspect[1].replace('\n', '\n>')))

    display(Markdown("#### Parameters:"))
    display(Markdown(f"* seeds: {seeds}\n* limit: {limit}"))

In [None]:
for mode in ["Monolithic", "Independent", "IndependentSimulation", "IndependentSimulationNoDispersal"]:
    seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=1000)
    
    test_randomness_ent(mode, seeds=seeds, limit='10MB')