In [1]:
import numpy as np
import statsmodels.stats.api as sms
from scipy.stats import t as t
import math
from itertools import combinations
import threading
#from tqdm import tqdm

In [2]:
from src.bootstrap import bootstrap_ci
from src.perm_test import search, ttest_ind, pval
from src.utils import partition

---

In [31]:
np.random.seed(123)
n1, n2 = 12, 8
gamma1 = (2, 5)  # shape k, scale theta
gamma2 = (4, 3)
delta_true = (gamma1[0] * gamma1[1]) - (gamma2[0] * gamma2[1])  # true mean difference
x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
x2 = np.random.gamma(gamma2[0], gamma2[1], n2)
print(x1)
print(x2)
print(delta_true)

[ 3.10640204 16.57130493  5.12866305 24.18476682  0.43397101  5.8600058
  4.67440403  7.73681736  5.78342153  5.83228843 16.63987585 11.08298414]
[ 6.45227135 19.23152853 17.07707311  4.66477226  9.5959506   1.48357088
 17.23434512 10.03238453]
-2


In [4]:
partitions = partition(n1, n2)
partitions

array([[ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       ...,
       [ 7,  8, 10, ...,  5,  6,  9],
       [ 7,  9, 10, ...,  5,  6,  8],
       [ 8,  9, 10, ...,  5,  6,  7]])

In [5]:
def tconfint(alpha, x1, x2, pooled=True, alternative="two-sided"):
    cm = sms.CompareMeans(sms.DescrStatsW(x1), sms.DescrStatsW(x2))
    return cm.tconfint_diff(alpha, alternative, usevar="pooled")

In [97]:
def passToThread(batch_start, batch_size, two_sided=True):
    global n_captured
    
    def invert_test(alternative, batch_idx):
        t99 = tconfint(0.001, x1s[batch_idx], x2s[batch_idx], pooled, alternative)
        t90 = tconfint(0.20, x1s[batch_idx], x2s[batch_idx], pooled, alternative)
        lower, upper = -np.inf, np.inf
        if alternative != "smaller":
            lower = search(x1, x2, partitions, t99[0], t90[0], alternative=alternative)
        if alternative != "larger":
            upper = search(x1, x2, partitions, t90[1], t99[1], alternative=alternative)
        return lower, upper

    for i in range(batch_start, batch_start + batch_size):
        try:
            if two_sided:
                lower, upper = invert_test("two-sided", i)
            else:
                lower1, upper1 = invert_test("smaller", i)
                lower2, upper2 = invert_test("larger", i)
                lower, upper = max(lower1, lower2), min(upper1, upper2)
        except AssertionError:
            continue
        intervals.append((lower, upper))
        n_captured += (lower <= delta_true) * (delta_true <= upper)

In [98]:
np.random.seed(123)
x1s = np.random.gamma(gamma1[0], gamma1[1], (7300, n1))
x2s = np.random.gamma(gamma1[0], gamma1[1], (7300, n2))

In [101]:
n_captured = 0
intervals = []
pooled = True
%time passToThread(0, 1, two_sided=True)
intervals

CPU times: user 233 ms, sys: 48.9 ms, total: 282 ms
Wall time: 280 ms


[(-4.475304927828296, 4.841502455220187)]

In [102]:
n_captured = 0
intervals = []
pooled = True
%time passToThread(0, 1, two_sided=False)
intervals

CPU times: user 128 ms, sys: 27.1 ms, total: 155 ms
Wall time: 153 ms


[(-7.008071218100954, 3.6351771998122877)]

---

In [19]:
thread_count = 16
threads = [0] * thread_count

remaining = 128
batch_size = remaining // thread_count

intervals = []
n_captured = 0

In [20]:
%%time

for i in range(thread_count):
    n_samples = batch_size if i < thread_count-1 else remaining
    threads[i] = threading.Thread(target=passToThread, args=(n_samples,))
    threads[i].start()
    remaining -= n_samples
    
for thread in threads:
    thread.join()

CPU times: user 49.6 s, sys: 14 s, total: 1min 3s
Wall time: 10.8 s


In [21]:
n_captured, len(intervals)

(125, 128)

Multithreading allows us to compute one confidence interval in \~80 ms, even faster than it took to compute the one confidence interval above (\~360 ms).