In [42]:
import scipy
from scipy.stats import norm, binom, iqr
import numpy as np
from numpy.random import seed
from numpy.random import normal
import matplotlib.pyplot as plt

In [12]:
def to_percentage(n):
    return str(n * 100) + "%"

In [13]:
def percentage_until_z_score(z_score):
    res = norm.cdf(z_score)
    print(to_percentage(res))
    return res

In [14]:
def percentage_after_z_score(z_score):
    res = norm.cdf(-z_score)
    print(to_percentage(res))
    return res

def percentage_between_z_score(z1, z2):
    res = percentage_until_z_score(z2) - percentage_until_z_score(z1)
    print(res)
    return res

In [15]:
def percentage_two_tail(z_score):
    res = norm.cdf(1-((1 - z_score) / 2))
    print(to_percentage(res))
    return to_percentage(res)
def two_tail_zscore(percentage):
    res = norm.ppf(1 - (1 - percentage) / 2)
    print(res)
    return res

In [16]:
def calc_zscore(val, mean, std_d):
    res = (val - mean) / std_d
    print(res)
    return res

In [17]:
def p_less_than(val, mean, std_d):
    res = norm(mean, std_d).cdf(val)
    print(to_percentage(res))
    return res

In [18]:
def p_more_than(val, mean, std_d):
    res = (1 - norm(mean, std_d).cdf(val))
    print(to_percentage(res))
    return res

In [19]:
def p_between(val1, val2, mean, std_d):
    if val1 <= val2:
        return p_less_than(val2, mean, std_d) - p_less_than(val1, mean, std_d)
    else:
        return p_less_than(val1, mean, std_d) - p_less_than(val2, mean, std_d)

In [20]:
def p_to_zscore(percentage):
    res = norm.ppf(percentage)
    print(res)
    return res

def p_to_val(percentage, mean, std_d):
    std_diff = p_to_zscore(percentage)
    res = mean + std_diff * std_d
    print(res)
    return res

In [21]:
def binomial_exact_p(sample_size, p, expected_val):
    res = binom.pmf(expected_val, sample_size, p)
    print(res)
    return res

In [22]:
def binomial_less_or_equal_to_p(sample_size, p, val):
    res = binom.cdf(val, sample_size, p)
    print(res)
    return res
def binomial_less_than_p(sample_size, p, val):
    res = binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

In [23]:
def binomial_more_or_equal_to_p(sample_size, p, val):
    res = 1 - binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

def binomial_more_than_p(sample_size, p, val):
    res = 1 - binom.cdf(val, sample_size, p)
    print(res)
    return res

In [24]:
def binomial_between_p(p1, p2, sample_size, p):
    res = binomial_less_or_equal_to_p(sample_size, p, p2) - binomial_less_or_equal_to_p(sample_size, p, p1 - 1)
    print(res)
    return res

In [25]:
def std_of_binom(sample_size, p):
    res = np.sqrt(sample_size * p * (1 - p))
    print(res)
    return res

In [26]:
def se(p, sample_size):
    res = np.sqrt((p * (1 - p) / sample_size))
    print(res)
    return res

In [27]:
def clt_more_than(p, sample_size, val):
    stde = se(p, sample_size)
    res = p_more_than(val, p, stde)
    print(res)
    return res

In [28]:
def clt_p_to_z(p, sample_size, val):
    stde = se(p, sample_size)
    res = calc_zscore(val, p, stde)
    print(res)
    return res

In [93]:
def confidence_interval(p, sample_size, conf_level_pct):
    std_e = se(p, sample_size)
    z = two_tail_zscore(conf_level_pct)
    margin_e = std_e * z
    print("margin error", margin_e)
    print(p - z * std_e, p + z * std_e)
    return (p - z * std_e, p + z * std_e)

def margin_of_err(z, se):
    return z * se

In [94]:
def conf_interval_in_populations(p1, p2, size_1, size_2, conf_level_pct):
    se_est = np.sqrt(p1 * (1 - p1) / size_1 + p2 * (1 - p2) /size_2)
    statistic = p1 - p2
    z = two_tail_zscore(conf_level_pct)
    res = (statistic - z * se_est, statistic + z * se_est)
    print(res)
    return res