In [31]:
import numpy as np
import multiprocess as mp
import statsmodels.stats.api as sms
#from tqdm import tqdm

In [8]:
from src.bootstrap import bootstrap_ci
from src.perm_test import search
from src.utils import partition

---

In [9]:
n1, n2 = 12, 8
gamma1 = (2, 5)  # shape k, scale theta
gamma2 = (4, 3)
delta_true = (gamma1[0] * gamma1[1]) - (gamma2[0] * gamma2[1])  # true mean difference
partitions = partition(n1, n2)
partitions

array([[ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       ...,
       [ 7,  8, 10, ...,  5,  6,  9],
       [ 7,  9, 10, ...,  5,  6,  8],
       [ 8,  9, 10, ...,  5,  6,  7]])

In [10]:
# https://www.statsmodels.org/dev/generated/statsmodels.stats.weightstats.CompareMeans.tconfint_diff.html
def tconfint(alpha, x1, x2, pooled=True, alternative="two-sided"):
    cm = sms.CompareMeans(sms.DescrStatsW(x1), sms.DescrStatsW(x2))
    return cm.tconfint_diff(alpha, alternative, usevar="pooled" if pooled else "unequal")

In [107]:
def passToThread(batch_start, batch_size, two_sided=True, pooled=True):

    def invert_test(batch_idx, alternative):
        x1, x2 = x1s[batch_idx], x2s[batch_idx]
        t99 = tconfint(0.001, x1, x2, pooled, alternative)
        t90 = tconfint(0.20, x1, x2, pooled, alternative)

        lower, upper = -np.inf, np.inf
        if alternative != "smaller":
            lower = search(x1, x2, partitions, t99[0], t90[0], pooled=pooled, alternative=alternative)
        if alternative != "larger":
            upper = search(x1, x2, partitions, t90[1], t99[1], pooled=pooled, alternative=alternative)
        return lower, upper

    n_captured = n_error = 0
    for i in range(batch_start, batch_start + batch_size):
        try:
            if two_sided:
                lower, upper = invert_test(i, "two-sided")
            else:
                lower1, upper1 = invert_test(i, "smaller")
                lower2, upper2 = invert_test(i, "larger")
                lower, upper = max(lower1, lower2), min(upper1, upper2)
        except AssertionError:
            n_error += 1
            continue

        n_captured += (lower <= delta_true) * (delta_true <= upper)
    return n_captured, n_error

In [109]:
np.random.seed(123)
x1s = np.random.gamma(gamma1[0], gamma1[1], (7300, n1))
x2s = np.random.gamma(gamma2[0], gamma2[1], (7300, n2))

In [114]:
batch_size = 8
n_batches = 16
pool = mp.Pool(mp.cpu_count())
# invert single two-sided test w/ unpooled variance
%time pool.starmap(passToThread, [(i * batch_size, batch_size, False, False) for i in range(n_batches)])

CPU times: user 7.37 ms, sys: 3.03 ms, total: 10.4 ms
Wall time: 11.3 s


[(7, 0),
 (6, 0),
 (7, 0),
 (8, 0),
 (8, 0),
 (7, 0),
 (8, 0),
 (8, 0),
 (8, 0),
 (8, 0),
 (7, 0),
 (8, 0),
 (7, 0),
 (7, 0),
 (6, 0),
 (8, 0)]

In [112]:
# invert single two-sided test w/ pooled variance
%time pool.starmap(passToThread, [(i * batch_size, batch_size, False, True) for i in range(n_batches)])

CPU times: user 18.2 ms, sys: 7.25 ms, total: 25.5 ms
Wall time: 11.4 s


[(7, 0),
 (6, 0),
 (7, 0),
 (8, 0),
 (8, 0),
 (7, 0),
 (8, 0),
 (8, 0),
 (8, 0),
 (8, 0),
 (7, 0),
 (8, 0),
 (8, 0),
 (7, 0),
 (7, 0),
 (8, 0)]