In [None]:
import numpy as np
from numpy import mean, var, std
import pandas as pd
import scipy.stats as stats

import pytest

from estimators import Paired, Single, PairedExperimental, SingleExperimental
from test_estimators import BernoulliModel, BernoulliModelStratified
import plotly.express as px
import importlib
import test_estimators as te
import estimators
importlib.reload(estimators)
importlib.reload(te)

In [None]:
# pA = np.random.beta(0.2, 0.8, (100, 1))

def _table_single(pA, K, N):
    model = BernoulliModel(pA, K=K, N=N)
    truth = model.true_vars()
    estimators = [
        Single.from_samples,
        Single.from_samples_unbiasedK,
        SingleExperimental.from_samples_unbiasedNK,
        # SingleExperimental.from_samples_naive,
    ]
    t = te.TestSingleEstimators().test_estimators(truth, model, estimators, verbose=False)
    # assert all(table[table["unbiased"] == True]["t_score"].abs() < 5)
    return t

def test_single():
    pA = np.array([[0.3], [0.1]])
    t = _table_single(pA, K=10, N=2)
    t_unbiased = t[t["unbiased"] == True]
    assert all(t_unbiased["t_score"].abs() < 4)

    pA = np.array([[0.1], [0.9]])
    t = _table_single(pA, K=10, N=2)
    t_unbiased = t[t["unbiased"] == True]
    assert all(t_unbiased["t_score"].abs() < 4)

    pA = np.random.beta(0.2, 0.8, (200, 1))
    t = _table_single(pA, 10, N=100)
    assert all(t["rms"] < 0.26)

    pA = np.random.beta(0.2, 0.8, (400, 1))
    t = _table_single(pA, 20, N=400)
    assert all(t["rms"] < 0.13)

    pA = 0.2 + 0.4 * np.random.rand(1000, 1)
    t = _table_single(pA, K=10, N=500)
    assert all(t[t["estimator"]!="from_samples"]["rms"] < 0.25)
    assert all(t[t["estimator"]!="from_samples"]["t_score"] < 4)
    assert any(t[t["estimator"]=="from_samples"]["rms"] > 0.25)

    pA = np.random.beta(2, 1, (200, 1))
    t = _table_single(pA, 10, N=100)
    assert all(t[t["estimator"]!="from_samples"]["rms"] < 0.25)

test_single()


In [None]:
def _subtest_stratified(pA, K):
    model = te.BernoulliModelStratified(pA, K=K, N=pA.shape[0])
    truth = model.true_vars()
    estimators_to_test = [
        SingleExperimental.from_samples_unbiased_stratified
    ]
    t = te.TestSingleEstimators().test_estimators(truth, model, estimators_to_test, verbose=False)
    # display(t)
    t_unbiased = t[t["unbiased"] == True]
    if not all(t_unbiased["t_score"].abs() < 4):
        raise "some unbiased estimator appears to be biased"

    if any(t[t["comp"] == "var(A)"]["rms"] > 0.25):
        raise ValueError("some total variance has unacceptable relative error")

def test_stratified():
    pA = np.array([[0.9], [0.1]])
    _subtest_stratified(pA, 2)
    _subtest_stratified(pA, 10)
    pA = np.array([[0.4], [0.1]])
    _subtest_stratified(pA, 20)

In [None]:
def visualize_table(table):
    fig = px.bar(table, x="estimator", y="rms", color="comp", barmode="group")
    display(fig)

    fig = px.bar(table, x="estimator", y="t_score", color="comp", barmode="group")
    fig.update_yaxes(range=[-10, 10])
    display(fig)

    fig = px.bar(table, x="estimator", y="mean_rel_error", color="comp", barmode="group")
    display(fig)
    
def _table_paired(pA, pB, K, N):
    modelA = BernoulliModel(pA, K=K, N=N)
    modelB = BernoulliModel(pB, K=K, N=N)
    truth = Paired.from_bernoulli_prob(pA, pB)
    estimators = [
        Paired.from_samples,
        Paired.from_samples_unbiasedK,
    ]
    return te.TestPairedEstimators().test_estimators(truth, modelA, modelB, estimators, verbose=False)


def test_paired():
    pA = np.random.beta(0.3, 0.7, (100, 1))
    pB = np.random.beta(0.2, 0.8, (100, 1))
    t = _table_paired(pA, pB, K=10, N=100)
    t_unbiased = t[t["unbiased"] == True]
    assert all(t_unbiased["t_score"].abs() < 4)
    assert all(t["rms"] < 0.25)
    assert any(t["rms"] > 0.1)

    pA = np.random.beta(0.3, 0.7, (500, 1))
    pB = np.clip(pA + 0.1 * (np.random.randn(*pA.shape) - 0), 0, 1)
    t = _table_paired(pA, pB, K=100, N=500)
    display(t)
    # in this test we see that the naive estimator can indeed have higher error for estimating var(E(A-B))
    est_varE = t[(t["comp"] == "var(E(A-B))") & (t["estimator"] == "from_samples")]["rms"].values[0]
    est2_varE = t[(t["comp"] == "var(E(A-B))") & (t["estimator"] == "from_samples_unbiasedK")]["rms"].values[0]
    assert est_varE > 2*est2_varE
    
# test_paired()

pA = np.random.beta(0.3, 0.7, (500, 1))
pB = np.where(np.random.rand(500, 1) < 0.6, pA, np.random.beta(0.2, 0.8, (500, 1)))
t = _table_paired(pA, pB, K=10, N=200)
display(t)
