In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st

from statsmodels.stats import proportion, weightstats
from math import ceil
from IPython.display import display, Latex

# Reference

> [Unit: Two-sample inference for the difference between groups](https://www.khanacademy.org/math/statistics-probability/significance-tests-confidence-intervals-two-samples)<br>

---

# Two-proportion z test

> [TO POOL OR NOT TO POOL](https://www.ijpam.eu/contents/2013-89-4/5/5.pdf): There are two versions of this test, one is used when the variances of the two populations are equal (the pooled test) and the other one is used when the variances of the two populations are unequal (the unpooled test).

---

## Pooled z test

### Formula

$\displaystyle H_{0}\colon p_{1}=p_{2} \Rightarrow d_0 = 0$

$\displaystyle z={\frac {({\hat {p}}_{1}-{\hat {p}}_{2})}{\sqrt {{\hat {p}}(1-{\hat {p}})({\frac {1}{n_{1}}}+{\frac {1}{n_{2}}})}}}$

$\displaystyle \hat{p}=\frac{x_1 + x_2}{n_1 + n_2}$

### Conditions

- **Random**
- **Normal**
    - $n_1 p_1 > 5$ **and** $n_1(1 − p_1) > 5$
    - $n_2 p_2 > 5$ **and** $n_2(1 − p_2) > 5$
- **Independent**

---

### Example 1

- $H_0: p_1 - p_2 = 0 \Rightarrow P(\hat p_1 - \hat p_2 | H_0) < 5\%$
- $H_\text{a}: p_1 - p_2 \neq 0$
- $\alpha = 5\%$

- $k1 = 642, n1 = 1000$
- $k2 = 591, n1 = 1000$

In [2]:
k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

# estimated proportion
p_hat_1 = k1 / n1
p_hat_2 = k2 / n2
p_hat = (k1 + k2) / (n1 + n2)

# difference
d0 = 0
d = p_hat_1 - p_hat_2

var1 = p_hat_1 * (1 - p_hat_1) / n1
var2 = p_hat_2 * (1 - p_hat_2) / n2

# standard error
SE = np.sqrt(var1 + var2)

# margin of error
critical_value = st.norm.interval(CL)[1]
MOE = critical_value * SE

# confidence interval
ci_low, ci_upp = d - MOE, d + MOE
print('CI:', (ci_low, ci_upp))

# z statistic & p-value
zstat = (d - d0) / np.sqrt(p_hat * (1-p_hat) * (1/n1 + 1/n2)) # pooled
# zstat = (d - d0) / SE # unpooled
# pval = st.norm(loc=d0, scale=SE).sf(x=abs(d)) * 2 # unpooled
# OR
pval = st.norm.sf(zstat) * 2 # pooled or unpooled depends on zstat
print('zstat, pval:', (zstat, pval))

CI: (0.008438760424625616, 0.09356123957537447)
zstat, pval: (2.3453407727949207, 0.01900969625410689)


In [3]:
# OR
k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

# estimated proportion
p_hat_1 = k1 / n1
p_hat_2 = k2 / n2
p_hat = (k1 + k2) / (n1 + n2)

# difference
d0 = 0
d = p_hat_1 - p_hat_2

# confidence interval
ci_low, ci_upp = proportion.confint_proportions_2indep(
    count1 = k1, nobs1 = n1,
    count2 = k2, nobs2 = n2,
    method = 'score',  # wald(unpooled), agresti-caffo, score(pooled)
    compare = 'diff',
    alpha = alpha)
print('CI:', (ci_low, ci_upp))

# full output
# proportion.test_proportions_2indep(
#     count1 = k1, nobs1 = n1,
#     count2 = k2, nobs2 = n2,
#     value = d0,
#     method = 'score',  # wald(unpooled), agresti-caffo, score(pooled)
#     compare = 'diff',
#     alternative = 'two-sided',
#     return_results = True
# )

# zstat & pval
zstat, pval = proportion.test_proportions_2indep(
    count1 = k1, nobs1 = n1,
    count2 = k2, nobs2 = n2,
    value = d0,
    method = 'score',  # wald(unpooled), agresti-caffo, score(pooled)
    compare = 'diff',
    alternative = 'two-sided',
    return_results = True
)
print('zstat, pval:', (zstat, pval))

CI: (0.00837210032506908, 0.09354065155458353)
zstat, pval: (2.3447543642914934, 0.019039618473082894)


In [4]:
# R Chi-square test
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects

k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

stats = importr("stats")
result = stats.prop_test(
    x = robjects.IntVector([k1, k2]), 
    n = robjects.IntVector([n1, n2]),
    alternative = 'two.sided',
    conf_level = CL,
    correct = False  # True if either the expected successes or failures is < 5
)
print(result)


	2-sample test for equality of proportions without continuity
	correction

data:  c(642L, 591L) out of c(1000L, 1000L)
X-squared = 5.5006, df = 1, p-value = 0.01901
alternative hypothesis: two.sided
95 percent confidence interval:
 0.00843876 0.09356124
sample estimates:
prop 1 prop 2 
 0.642  0.591 




---

## Unpooled z test

### Formula

$\displaystyle H_{0}\colon |d_0| > 0$

$\displaystyle z=\frac{(\hat{p}_1 - \hat{p}_2) - d_0}{\sqrt{\frac{\hat{p}_1(1 - \hat{p}_1)}{n_1} + \frac{\hat{p}_2(1 - \hat{p}_2)}{n_2}}}$

### Conditions

- **Random**
- **Normal**
    - $n_1 p_1 > 5$ **and** $n_1(1 − p_1) > 5$
    - $n_2 p_2 > 5$ **and** $n_2(1 − p_2) > 5$
- **Independent**

---

### Example 1

- $H_0: p_1 - p_2 = 0 \Rightarrow P(\hat p_1 - \hat p_2 | H_0) < 5\%$
- $H_\text{a}: p_1 - p_2 \neq 0$
- $\alpha = 5\%$

- $k1 = 642, n1 = 1000$
- $k2 = 591, n1 = 1000$

In [5]:
k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

# estimated proportion
p_hat_1 = k1 / n1
p_hat_2 = k2 / n2
p_hat = (k1 + k2) / (n1 + n2)

# difference
d0 = 0
d = p_hat_1 - p_hat_2

var1 = p_hat_1 * (1 - p_hat_1) / n1
var2 = p_hat_2 * (1 - p_hat_2) / n2

# standard error
SE = np.sqrt(var1 + var2)

# margin of error
critical_value = st.norm.interval(CL)[1]
MOE = critical_value * SE

# confidence interval
ci_low, ci_upp = d - MOE, d + MOE
print('CI:', (ci_low, ci_upp))

# z statistic & p-value
# zstat = (d - d0) / np.sqrt(p_hat * (1-p_hat) * (1/n1 + 1/n2)) # pooled
zstat = (d - d0) / SE # unpooled
pval = st.norm(loc=d0, scale=SE).sf(x=abs(d)) * 2 # unpooled
# OR
# pval = st.norm.sf(zstat) * 2 # pooled or unpooled depends on zstat
print('zstat, pval:', (zstat, pval))

CI: (0.008438760424625616, 0.09356123957537447)
zstat, pval: (2.3485726498759636, 0.018845522376746224)


In [6]:
# OR
k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

# estimated proportion
p_hat_1 = k1 / n1
p_hat_2 = k2 / n2
p_hat = (k1 + k2) / (n1 + n2)

# difference
d0 = 0
d = p_hat_1 - p_hat_2

# confidence interval
ci_low, ci_upp = proportion.confint_proportions_2indep(
    count1 = k1, nobs1 = n1,
    count2 = k2, nobs2 = n2,
    method = 'wald',  # wald(unpooled), agresti-caffo, score(pooled)
    compare = 'diff',
    alpha = alpha)
print('CI:', (ci_low, ci_upp))

# full output
# proportion.test_proportions_2indep(
#     count1 = k1, nobs1 = n1,
#     count2 = k2, nobs2 = n2,
#     value = d0,
#     method = 'wald',  # wald(unpooled), agresti-caffo, score(pooled)
#     compare = 'diff',
#     alternative = 'two-sided',
#     return_results = True
# )

# zstat & pval
zstat, pval = proportion.test_proportions_2indep(
    count1 = k1, nobs1 = n1,
    count2 = k2, nobs2 = n2,
    value = d0,
    method = 'wald',  # wald(unpooled), agresti-caffo, score(pooled)
    compare = 'diff',
    alternative = 'two-sided',
    return_results = True
)
print('zstat, pval:', (zstat, pval))

CI: (0.008438760424625609, 0.09356123957537449)
zstat, pval: (2.3485726498759636, 0.018845522376746224)


In [7]:
# R Chi-square test
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects

k1, n1 = 642, 1000
k2, n2 = 591, 1000
alpha = .05
CL = 1 - alpha

stats = importr("stats")
result = stats.prop_test(
    x = robjects.IntVector([k1, k2]), 
    n = robjects.IntVector([n1, n2]),
    alternative = 'two.sided',
    conf_level = CL,
    correct = False # True if either the expected successes or failures is < 5
)
print(result)


	2-sample test for equality of proportions without continuity
	correction

data:  c(642L, 591L) out of c(1000L, 1000L)
X-squared = 5.5006, df = 1, p-value = 0.01901
alternative hypothesis: two.sided
95 percent confidence interval:
 0.00843876 0.09356124
sample estimates:
prop 1 prop 2 
 0.642  0.591 




---

# Two-sample t test

> [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Dependent_t-test_for_paired_samples)

- Paired(dependent) t-test: Since we are ultimately concerned with the difference between two measures in one sample, the paired t-test reduces to the one sample t-test.
- Unpaired(independent) t-test
    - pooled t-test
    - unpooled t-test

---

## Paired t test

---

### Example 1: Two-sided

> [How to Conduct a Paired Samples T-Test in Python](https://www.geeksforgeeks.org/how-to-conduct-a-paired-samples-t-test-in-python/)

Let us consider that we want to know whether an engine oil significantly impacts the car’s mileage of different brands. In order to test this, we have 10 cars in a garage doped with original engine oil initially. We have noted their mileage for $100$ kilometers each. Then, we have each of the cars doped with another engine oil (different from the original one). Then, the mileage of the cars is calculated for 100 kilometers each. To compare the difference between the mean mileage of the first and second test, we use a paired samples t-test because for each car their first test score can be paired with their second test score. Conducting paired sample T-test is a step-by-step process.

In [8]:
# pre holds the mileage before 
# applying the different engine oil
pre = np.array([30, 31, 34, 40, 36, 35, 34, 30, 28, 29])
  
# post holds the mileage after 
# applying the different engine oil
post = np.array([30, 31, 32, 38, 32, 31, 32, 29, 28, 30])

In [9]:
# Performing the paired sample t-test
st.ttest_rel(a=pre, b=post, alternative='two-sided')

Ttest_relResult(statistic=2.584921310565987, pvalue=0.029457853822895275)

In [10]:
# OR simplify the paired t test to one sample t test
diff = post - pre
n, mu_0, mu_1, sd_1 = len(diff), 0, diff.mean(), diff.std(ddof=1)
SE = sd_1 / np.sqrt(n)

if diff.mean() < 0:
    pval = st.t(loc=mu_0, scale=SE, df=n-1).cdf(x=mu_1) * 2
else:
    pval = st.t(loc=mu_0, scale=SE, df=n-1).sf(x=mu_1) * 2
pval

0.029457853822895237

---

## Pooled t test

---

### Example 1: Two-sided

> [How to Conduct a Two Sample T-Test in Python](https://www.statology.org/two-sample-t-test-python/)

Researchers want to know whether or not two different species of plants have the same mean height. To test this, they collect a simple random sample of 20 plants from each species.

In [13]:
A = np.array([14, 15, 15, 16, 13, 8, 14, 17, 16, 14, 19, 20, 21, 15, 15, 16, 16, 13, 14, 12])
B = np.array([15, 17, 14, 17, 14, 8, 12, 19, 19, 14, 17, 22, 24, 16, 13, 16, 13, 18, 15, 13])

In [14]:
# t statistic & p-value
st.ttest_ind(
    a = A, 
    b = B, 
    equal_var = True, # True to perform pooled t test; False to perform unpooled t test
    alternative='two-sided'
)

Ttest_indResult(statistic=-0.6337397070250238, pvalue=0.5300471010405257)

In [15]:
# OR
weightstats.ttest_ind(
    x1 = A, 
    x2 = B,
    alternative = 'two-sided',
    usevar='pooled',
)

(-0.6337397070250238, 0.5300471010405257, 38.0)

---

## Unpooled t test

---

### Example 1: Two-sided

> [How to Conduct a Two Sample T-Test in Python](https://www.statology.org/two-sample-t-test-python/)

Researchers want to know whether or not two different species of plants have the same mean height. To test this, they collect a simple random sample of 20 plants from each species.

In [16]:
A = np.array([14, 15, 15, 16, 13, 8, 14, 17, 16, 14, 19, 20, 21, 15, 15, 16, 16, 13, 14, 12])
B = np.array([15, 17, 14, 17, 14, 8, 12, 19, 19, 14, 17, 22, 24, 16, 13, 16, 13, 18, 15, 13])

In [17]:
# t statistic & p-value
st.ttest_ind(
    a = A, 
    b = B, 
    equal_var = False, # True to perform pooled t test; False to perform unpooled t test
    alternative='two-sided'
)

Ttest_indResult(statistic=-0.6337397070250238, pvalue=0.5302413334606599)

In [18]:
# OR
weightstats.ttest_ind(
    x1 = A, 
    x2 = B,
    alternative = 'two-sided',
    usevar='unequal',
)

(-0.6337397070250238, 0.5302413334606599, 36.14149072044496)