In [1]:
import numpy as np
import statsmodels.stats.api as sms
from scipy.stats import t as t
import math
from itertools import combinations
import threading
#from tqdm import tqdm

In [2]:
from src.bootstrap import bootstrap_ci
from src.perm_test import search, ttest_ind, pval
from src.utils import partition

---

In [3]:
np.random.seed(123)
n1, n2 = 12, 8
gamma1 = (2, 5)  # shape k, scale theta
gamma2 = (4, 3)
delta_true = (gamma1[0] * gamma1[1]) - (gamma2[0] * gamma2[1])  # true mean difference
x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
x2 = np.random.gamma(gamma2[0], gamma2[1], n2)
print(x1)
print(x2)

[ 3.10640204 16.57130493  5.12866305 24.18476682  0.43397101  5.8600058
  4.67440403  7.73681736  5.78342153  5.83228843 16.63987585 11.08298414]
[ 6.45227135 19.23152853 17.07707311  4.66477226  9.5959506   1.48357088
 17.23434512 10.03238453]


In [4]:
partitions = partition(n1, n2)
partitions

array([[ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       [ 0,  1,  2, ..., 17, 18, 19],
       ...,
       [ 7,  8, 10, ...,  5,  6,  9],
       [ 7,  9, 10, ...,  5,  6,  8],
       [ 8,  9, 10, ...,  5,  6,  7]])

In [5]:
def tconfint(alpha, x1, x2, pooled=True, alternative="two-sided"):
    cm = sms.CompareMeans(sms.DescrStatsW(x1), sms.DescrStatsW(x2))
    return cm.tconfint_diff(alpha, alternative, usevar="pooled")

In [6]:
def passToThread(batch_size):
    global n_captured

    for _ in range(batch_size):
        x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
        x2 = np.random.gamma(gamma2[0], gamma2[1], n2)
        print("[passToThread]", x1, x2)

        t99 = tconfint(0.001, x1, x2, pooled, alternative)
        t90 = tconfint(0.20, x1, x2, pooled, alternative)

        lower, upper = -np.inf, np.inf
        try:
            if alternative != "smaller":
                lower = search(x1, x2, partitions, t99[0], t90[0])
            if alternative != "larger":
                upper = search(x1, x2, partitions, t90[1], t99[1])
        except AssertionError:
            continue

        intervals.append((lower, upper))
        n_captured += (lower <= delta_true) * (delta_true <= upper)

In [7]:
n_captured = 0
intervals = []
pooled = True
alternative="smaller"

In [8]:
np.random.seed(123)

In [9]:
%%time
passToThread(1)

[passToThread] [ 3.10640204 16.57130493  5.12866305 24.18476682  0.43397101  5.8600058
  4.67440403  7.73681736  5.78342153  5.83228843 16.63987585 11.08298414] [ 6.45227135 19.23152853 17.07707311  4.66477226  9.5959506   1.48357088
 17.23434512 10.03238453]
[pval] [ 2.2503328  15.71523568  4.27259381 23.32869757 -0.42209823  5.00393656
  3.81833478  6.88074811  4.92735229  4.97621918 15.78380661 10.2269149 ] [ 6.45227135 19.23152853 17.07707311  4.66477226  9.5959506   1.48357088
 17.23434512 10.03238453]
t_obs = -0.8620486679895987
delta 0.8560692459202621
[pval] [-6.22400417  7.24089871 -4.20174317 14.8543606  -8.89643521 -3.47040042
 -4.65600219 -1.59358886 -3.54698469 -3.49811779  7.30946963  1.75257792] [ 6.45227135 19.23152853 17.07707311  4.66477226  9.5959506   1.48357088
 17.23434512 10.03238453]
t_obs = -3.6104848835268477
delta 9.330406219919354
[pval] [-1.98683569 11.47806719  0.03542532 19.09152909 -4.65926672  0.76676807
 -0.41883371  2.64357962  0.6901838   0.7390507  

In [10]:
intervals

[(-inf, 4.828414702482336)]

---

In [19]:
thread_count = 16
threads = [0] * thread_count

remaining = 128
batch_size = remaining // thread_count

intervals = []
n_captured = 0

In [20]:
%%time

for i in range(thread_count):
    n_samples = batch_size if i < thread_count-1 else remaining
    threads[i] = threading.Thread(target=passToThread, args=(n_samples,))
    threads[i].start()
    remaining -= n_samples
    
for thread in threads:
    thread.join()

CPU times: user 49.6 s, sys: 14 s, total: 1min 3s
Wall time: 10.8 s


In [21]:
n_captured, len(intervals)

(125, 128)

Multithreading allows us to compute one confidence interval in \~80 ms, even faster than it took to compute the one confidence interval above (\~360 ms).

In [10]:
x1

array([ 3.10640204, 16.57130493,  5.12866305, 24.18476682,  0.43397101,
        5.8600058 ,  4.67440403,  7.73681736,  5.78342153,  5.83228843,
       16.63987585, 11.08298414])

In [11]:
x2

array([ 6.45227135, 19.23152853, 17.07707311,  4.66477226,  9.5959506 ,
        1.48357088, 17.23434512, 10.03238453])

In [15]:
ttest_ind(x1-3, x2, 12, 8)

-1.5573782195752903