Types of Confidence Intervals:

1. Invert two 1-sided tests
- invert one test using `alternative == "less"`, and another using `alternative == "greater"`
- for both tests, use `alpha/2`

2. Invert one 2-sided test

3. t-test confidence interval for difference in means

4. Bootstrap confidence interval

For types 1-3, we have two subtypes of confidence intervals for pooled vs. unpooled variances. Ultimately, we will have 7 types of confidence intervals to compare (since the bootstrap CI does not rely on assumptions of the population variances).

For each type of CI, simulate 7300 instances (generating new samples each time, but keeping the parameters the same). Then measure the coverage probability, i.e., the proportion of the 7300 CIs which capture the true difference in means (which is known, since the data is simulated).

In [None]:
import numpy as np
import statsmodels.stats.api as sms
import scipy.stats as stats
import math
from itertools import combinations
#from tqdm import tqdm

In [None]:
from src.search import search
from src.bootstrap import bootstrap_ci
from src.perm_test import pval

In [None]:
def get_partitions(n1, n2):
    total_length = n1 + n2

    def get_groups(idxs):
        i = 0
        idxs2 = []
        for j in idxs:
            while i < total_length and i < j:
                idxs2.append(i)
                i += 1

            if i == j:
                i += 1

        idxs2 += range(i, total_length)
        return list(idxs), idxs2

    partitions = []
    for idxs in combinations(range(total_length), n1):
        partitions.append(get_groups(idxs))
    return partitions

In [None]:
def tconfint(alpha, pooled, x1, x2):
    cm = sms.CompareMeans(sms.DescrStatsW(x1), sms.DescrStatsW(x2))
    return cm.tconfint_diff(alpha, usevar="pooled" if pooled else "unequal")

In [None]:
n1, n2 = 12, 8
partitions = get_partitions(n1, n2)
#partitions

In [None]:
alpha = 0.05
alternative = "less"
pooled = True

intervals = []
n_captured = n_error = 0

gamma1 = (2, 5)  # shape k, scale theta
gamma2 = (4, 3)
delta_true = (gamma1[0] * gamma1[1]) - (gamma2[0] * gamma2[1])  # true mean difference

n_samples = 1

for _ in range(n_samples):
    x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
    x2 = np.random.gamma(gamma2[0], gamma2[1], n2)

    t99 = tconfint(0.01, pooled, x1, x2)
    t90 = tconfint(0.10, pooled, x1, x2)

    lower = search(x1, x2, partitions, t99[0], t90[0])
    if lower is None:
        n_error += 1
        continue

    upper = search(x1, x2, partitions, t90[1], t99[1])
    if upper is None:
        n_error += 1
        continue

    intervals.append((lower, upper))
    n_captured += (lower <= delta_true) * (delta_true <= upper)

In [None]:
intervals

In [10]:
delta_true

-2

In [11]:
intervals

[(-10.3336376245957, 3.2744722384624785)]

In [None]:
#x1 = np.random.gamma(gamma1[0], gamma1[1], n1)
#x2 = np.random.gamma(gamma2[0], gamma2[1], n2)
p_lower = pval(x1, x2, partitions, delta=intervals[0][0])
p_upper = pval(x1, x2, partitions, delta=intervals[0][1])
p_lower, p_upper

## Scrap Code

In [None]:
a = np.array([18.85,16.93,19.29,18.31])#,17.27,18.64,17.82,19.00,19.58,18.04,17.27,19.19])
b = np.array([19.23,19.57,19.50,18.64])#,18.70,19.54,19.04,20.67,20.71,18.99,19.37,19.06])

In [None]:
tconfint(0.05, True, a, b)

In [None]:
from itertools import combinations

In [None]:
np.append(a,b)

In [None]:
def get_groups(total_length, idxs):
    i = 0
    idxs2 = []
    for j in idxs:
        while i < total_length and i < j:
            idxs2.append(i)
            i += 1
            
        if i == j:
            i += 1
                
    while i < total_length:
        idxs2.append(i)
        i += 1
    
    return idxs, idxs2
        
#get_groups(7, [1,2,3])

In [None]:
def get_partitions(combined, n1):

    def partition(idxs):
        i = 0
        group1 = []
        group2 = []
        for j in idxs:
            while i < len(combined) and i < j:
                group2.append(combined[i])
                i += 1

            if i == j:
                i += 1

            group1.append(combined[j])

        while i < len(combined):
            group2.append(combined[i])
            i += 1

        return group1, group2

    partitions = []
    for idxs in combinations(range(len(combined)), n1):
        partitions.append(partition(idxs))
    return partitions


#get_partitions(np.append(a, b), len(a))

In [None]:
x = (1,2)
a[x]

In [None]:
bootstrap_ci(a, b, epochs=100)

In [None]:
# https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.CompareMeans.html

X1, X2 = np.arange(10,21), np.arange(20,26.5,.5)

cm = sms.CompareMeans(sms.DescrStatsW(X1), sms.DescrStatsW(X2))
print(cm.tconfint_diff(0.05, usevar="pooled" if pooled else "unequal"))

In [None]:
tconfint(0.05, True, X1, X2)