In [1]:
import numpy as np
from scipy import stats
import math

# Z tests - hypothesis tests on difference in means, VARIANCES KNOWN
# Two-tailed test, H1: mu1 - mu2 != delta0
def z_test_two_tailed(x1, var1, n1, x2, var2, n2, delta0, alpha, real_delta):
    z0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2/ n2)
    P = 2 * (1 - stats.norm.cdf(z0))
    if alpha != None:
        upper = stats.norm.ppf(1 - alpha / 2)
        lower = stats.norm.ppf(alpha / 2)
        print(f'(-z_alpha/2, z_alpha/2) = ({lower}, {upper})')
        if z0 > upper or z0 < lower:
            print('Reject!')
        else:
            print('Fail to reject!')
        if real_delta is not None:
            minus = (delta0 - real_delta) / math.sqrt(var1 / n1 + var2 / 2)
            beta = stats.norm.cdf(upper - minus) - stats.norm.cdf(lower - minus)
            print(f'beta = {beta}')
    print('z_0, P = ')
    return z0, P

# Upper-tailed Z-test, H1: mu1 - mu2 > delta0
def z_test_upper(x1, var1, n1, x2, var2, n2, delta0, alpha, real_delta):
    z0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2/ n2)
    P = 1 - stats.norm.cdf(z0)
    if alpha != None:
        z_alpha = stats.norm.ppf(1 - alpha)
        print(f'z_alpha = {z_alpha}')
        if z0 > z_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
        if real_delta is not None:
            beta = stats.norm.cdf(z_alpha + (delta0 - real_delta)/math.sqrt(var1 / n1 + var2 / n2))
            print(f'beta = {beta}')
    print('z_0, P = ')
    return z0, P

# Lower-tailed Z-test, H1: mu1 - mu2 < delta0
def z_test_lower(x1, var1, n1, x2, var2, n2, delta0, alpha, real_delta):
    z0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2/ n2)
    P = stats.norm.cdf(z0)
    if alpha != None:
        z_alpha = stats.norm.ppf(alpha)
        print(f'-z_alpha = {z_alpha}')
        if z0 < z_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
        if real_delta is not None:
            beta = 1 - stats.norm.cdf(z_alpha + (delta0 - real_delta)/math.sqrt(var1 / n1 + var2 / n2))
            print(f'beta = {beta}')
    print('z_0, P = ')
    return z0, P

# T tests - hypothesis tests on difference in means, VARIANCES UNKNOWN
# two-tailed T-test, H1: mu1 - mu2 != 0, same var
def t_test_two_tailed_same_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    sp2 = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
    print(f's_p = {math.sqrt(sp2)}')
    t0 = (x1 - x2 - delta0) / math.sqrt(sp2 * (1 / n1 + 1 / n2))
    P = 1 - stats.t.cdf(abs(t0), df=(n1 + n2 - 1)) + stats.t.cdf(-1 * abs(t0), df=(n1 + n2 - 1))
    if alpha is not None:
        upper = stats.t.ppf(1 - alpha / 2, df=(n1 + n2 - 1))
        lower = stats.t.ppf(alpha / 2, df=(n1 + n2 - 1))
        print(f'(-t_alpha/2, t_alpha/2) = ({lower}, {upper})')
        if t0 > upper or t0 < lower:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

# upper-tailed T-test, H1: mu1 - mu2 > delta0, same var
def t_test_upper_same_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    sp2 = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
    print(f's_p = {math.sqrt(sp2)}')
    t0 = (x1 - x2 - delta0) / math.sqrt(sp2 * (1 / n1 + 1 / n2))
    P = 1 - stats.t.cdf(t0, df=(n1 + n2 - 1))
    if alpha is not None:
        t_alpha = stats.t.ppf(1 - alpha, df=(n1 + n2 - 1))
        print(f't_alpha = {t_alpha}')
        if t0 > t_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

# upper-tailed T-test, H1: mu1 - mu2 < delta0, same var
def t_test_lower_same_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    sp2 = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
    print(f's_p = {math.sqrt(sp2)}')
    t0 = (x1 - x2 - delta0) / math.sqrt(sp2 * (1 / n1 + 1 / n2))
    P = stats.t.cdf(t0, df=(n1 + n2 - 1))
    if alpha is not None:
        t_alpha = stats.t.ppf(alpha, df=(n1 + n2 - 1))
        print(f'-t_alpha = {t_alpha}')
        if t0 < t_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

# two-tailed T-test, H1: mu1 - mu2 != delta0, DIFFERENT var
def t_test_two_tailed_diff_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    v = math.floor((var1 / n1 + var2 / n2) ** 2 / ((var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1)))
    t0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2 / n2)
    P = 1 - stats.t.cdf(abs(t0), df=(v)) + stats.t.cdf(-1 * abs(t0), df=v)
    if alpha is not None:
        upper = stats.t.ppf(1 - alpha / 2, df=v)
        lower = stats.t.ppf(alpha / 2, df=v)
        print(f'(-t_alpha/2, t_alpha/2) = ({lower}, {upper})')
        if t0 > upper or t0 < lower:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

# upper-tailed T-test, H1: mu1 - mu2 > delta0, DIFFERENT var
def t_test_upper_diff_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    v = math.floor((var1 / n1 + var2 / n2) ** 2 / ((var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1)))
    t0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2 / n2)
    P = 1 - stats.t.cdf(abs(t0), df=(v)) + stats.t.cdf(-1 * abs(t0), df=v)
    if alpha is not None:
        t_alpha = stats.t.ppf(1 - alpha, df=v)
        print(f't_alpha = {t_alpha}')
        if t0 > t_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

# lower-tailed T-test, H1: mu1 - mu2 < delta0, DIFFERENT var
def t_test_lower_diff_var(x1, var1, n1, x2, var2, n2, delta0, alpha):
    v = math.floor((var1 / n1 + var2 / n2) ** 2 / ((var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1)))
    t0 = (x1 - x2 - delta0) / math.sqrt(var1 / n1 + var2 / n2)
    P = 1 - stats.t.cdf(abs(t0), df=(v)) + stats.t.cdf(-1 * abs(t0), df=v)
    if alpha is not None:
        t_alpha = stats.t.ppf(alpha, df=v)
        print(f't_alpha = {t_alpha}')
        if t0 > t_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('t_0, P = ')
    return t0, P

def proportion_test_two_tailed(x1, n1, x2, n2, alpha):
    p1 = x1 / n1
    p2 = x2 / n2
    p = (x1 + x2) / (n1 + n2)
    z0 = (p1 - p2) / math.sqrt(p * (1 - p) * (1 / n1 + 1 / n2))
    P = 2 * (1 - stats.norm.cdf(z0))
    if alpha != None:
        upper = stats.norm.ppf(1 - alpha / 2)
        lower = stats.norm.ppf(alpha / 2)
        print(f'(-z_alpha/2, z_alpha/2) = ({lower}, {upper})')
        if z0 > upper or z0 < lower:
            print('Reject!')
        else:
            print('Fail to reject!')
    print('z_0, P = ')
    return z0, P

def proportion_test_upper(x1, n1, x2, n2, alpha):
    p1 = x1 / n1
    p2 = x2 / n2
    p = (x1 + x2) / (n1 + n2)
    z0 = (p1 - p2) / math.sqrt(p * (1 - p) * (1 / n1 + 1 / n2))
    P = 1 - stats.norm.cdf(z0)
    if alpha != None:
        z_alpha = stats.norm.ppf(1 - alpha)
        print(f'z_alpha = {z_alpha}')
        if z0 > z_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('z_0, P = ')
    return z0, P

def proportion_test_lower(x1, n1, x2, n2, alpha):
    p1 = x1 / n1
    p2 = x2 / n2
    p = (x1 + x2) / (n1 + n2)
    z0 = (p1 - p2) / math.sqrt(p * (1 - p) * (1 / n1 + 1 / n2))
    P = stats.norm.cdf(z0)
    if alpha != None:
        z_alpha = stats.norm.ppf(alpha)
        print(f'-z_alpha = {z_alpha}')
        if z0 < z_alpha:
            print('Reject')
        else:
            print('Fail to reject!')
    print('z_0, P = ')
    return z0, P

# confidence interval for difference in mean, variance known
def ci_mean_two_tailed(confidence, x1, x2, n1, n2, var1, var2):
    alpha = (100 - confidence) / 100
    upper = x1 - x2 + stats.norm.ppf(1 - alpha / 2) * math.sqrt(var1 / n1 + var2 / n2)
    lower = x1 - x2 - stats.norm.ppf(1 - alpha / 2) * math.sqrt(var1 / n1 + var2 / n2)
    return lower, upper

# delta < value
def ci_mean_upper(confidence, x1, x2, n1, n2, var1, var2):
    alpha = (100 - confidence) / 100
    return x1 - x2 + stats.norm.ppf(1 - alpha) * math.sqrt(var1 / n1 + var2 / n2)

# delta > value
def ci_mean_lower(confidence, x1, x2, n1, n2, var1, var2):
    alpha = (100 - confidence) / 100
    return x1 - x2 - stats.norm.ppf(1 - alpha) * math.sqrt(var1 / n1 + var2 / n2)

In [5]:
proportion_test_two_tailed(alpha=.1, n1=400, n2=500, x1=330, x2=396)

(-z_alpha/2, z_alpha/2) = (-1.6448536269514729, 1.6448536269514722)
Fail to reject!
z_0, P = 


(1.2456821978060963, 0.21288109182776394)

In [2]:
def ci_t_two_tailed_pooled(x1, x2, alpha, n1, n2, s1, s2):
    sp2 = ((n1 - 1) * s1**2 + (n2 - 1) * s2**2) / (n1 + n2 - 2)
    left = x1 - x2 - stats.t.ppf(alpha / 2, df=(n1 + n2 - 2)) * math.sqrt(sp2 * (1 / n1 + 1 / n2))
    right = x1 - x2 + stats.t.ppf(alpha / 2, df=(n1 + n2 - 2)) * math.sqrt(sp2 * (1 / n1 + 1 / n2))
    return left, right

ci_t_two_tailed_pooled(n2=23, n1=26, x1=6.9, x2=7.2, s1=1.26, s2=1.76, alpha=.05)

(0.5722777243323464, -1.172277724332346)