In [1]:
import numpy as np
import statsmodels.stats.api as sms
from scipy.stats import t as t
import math
from itertools import combinations
import threading
#from tqdm import tqdm

In [10]:
from src.bootstrap import bootstrap_ci
from src.perm_test import search
from src.utils import partition

---

In [3]:
n1, n2 = 12, 8
gamma1 = (2, 5)  # shape k, scale theta
gamma2 = (4, 3)
delta_true = (gamma1[0] * gamma1[1]) - (gamma2[0] * gamma2[1])  # true mean difference

In [4]:
def tconfint(alpha, x1, x2, pooled=True, alternative="two-sided"):
    cm = sms.CompareMeans(sms.DescrStatsW(x1), sms.DescrStatsW(x2))
    return cm.tconfint_diff(alpha, alternative, usevar="pooled" if pooled else "unequal")

In [11]:
def passToThread(batch_start, batch_size, two_sided=True, pooled=True):
    global n_captured
    
    def invert_test(batch_idx, alternative):
        np.random.seed(batch_idx)
        x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
        x2 = np.random.gamma(gamma2[0], gamma2[1], n2)

        t99 = tconfint(0.001, x1, x2, pooled, alternative)
        t90 = tconfint(0.20, x1, x2, pooled, alternative)

        lower, upper = -np.inf, np.inf
        if alternative != "smaller":
            lower = search(x1, x2, partitions, t99[0], t90[0], pooled=pooled, alternative=alternative)
        if alternative != "larger":
            upper = search(x1, x2, partitions, t90[1], t99[1], pooled=pooled, alternative=alternative)
        return lower, upper

    for i in range(batch_start, batch_start + batch_size):
        try:
            if two_sided:
                lower, upper = invert_test(i, "two-sided")
            else:
                lower1, upper1 = invert_test(i, "smaller")
                lower2, upper2 = invert_test(i, "larger")
                lower, upper = max(lower1, lower2), min(upper1, upper2)
        except AssertionError:
            continue
        intervals.append((lower, upper))
        n_captured += (lower <= delta_true) * (delta_true <= upper)

In [None]:
n_captured = 0
intervals = []
pooled = True
%time passToThread(1, 1, two_sided=True, pooled=False)
intervals

In [194]:
n_captured = 0
intervals = []
pooled = True
%time passToThread(1, 1, two_sided=True, pooled=True)
intervals

CPU times: user 285 ms, sys: 58.6 ms, total: 344 ms
Wall time: 342 ms


[(-11.898873007088184, 4.710796316314038)]

---

In [19]:
thread_count = 16
threads = [0] * thread_count

remaining = 128
batch_size = remaining // thread_count

intervals = []
n_captured = 0

In [20]:
%%time

for i in range(thread_count):
    n_samples = batch_size if i < thread_count-1 else remaining
    threads[i] = threading.Thread(target=passToThread, args=(n_samples,))
    threads[i].start()
    remaining -= n_samples
    
for thread in threads:
    thread.join()

CPU times: user 49.6 s, sys: 14 s, total: 1min 3s
Wall time: 10.8 s


In [21]:
n_captured, len(intervals)

(125, 128)

Multithreading allows us to compute one confidence interval in \~80 ms, even faster than it took to compute the one confidence interval above (\~360 ms).