In [294]:
import scipy
from scipy.stats import norm, binom, iqr, t, tstd
import numpy as np
from numpy.random import seed
from numpy.random import normal
import matplotlib.pyplot as plt

In [295]:
def to_percentage(n):
    return str(n * 100) + "%"

In [296]:
def percentage_until_z_score(z_score):
    res = norm.cdf(z_score)
    print(to_percentage(res))
    return res

In [297]:
def percentage_after_z_score(z_score):
    res = norm.cdf(-z_score)
    print(to_percentage(res))
    return res

def percentage_between_z_score(z1, z2):
    res = percentage_until_z_score(z2) - percentage_until_z_score(z1)
    print(res)
    return res

In [298]:
def percentage_two_tail(z_score):
    res = norm.cdf(1-((1 - z_score) / 2))
    print(to_percentage(res))
    return to_percentage(res)
def two_tail_zscore(percentage):
    res = norm.ppf(1 - (1 - percentage) / 2)
    print(res)
    return res

In [299]:
def calc_zscore(val, mean, std_d):
    res = (val - mean) / std_d
    print(res)
    return res

In [300]:
def p_less_than(val, mean, std_d):
    res = norm(mean, std_d).cdf(val)
    print(to_percentage(res))
    return res

In [301]:
def p_more_than(val, mean, std_d):
    res = (1 - norm(mean, std_d).cdf(val))
    print(to_percentage(res))
    return res

In [302]:
def p_between(val1, val2, mean, std_d):
    if val1 <= val2:
        return p_less_than(val2, mean, std_d) - p_less_than(val1, mean, std_d)
    else:
        return p_less_than(val1, mean, std_d) - p_less_than(val2, mean, std_d)

In [303]:
def p_to_zscore(percentage):
    res = norm.ppf(percentage)
    print(res)
    return res

def p_to_val(percentage, mean, std_d):
    std_diff = p_to_zscore(percentage)
    res = mean + std_diff * std_d
    print(res)
    return res

In [304]:
def binomial_exact_p(sample_size, p, expected_val):
    res = binom.pmf(expected_val, sample_size, p)
    print(res)
    return res

In [305]:
def binomial_less_or_equal_to_p(sample_size, p, val):
    res = binom.cdf(val, sample_size, p)
    print(res)
    return res
def binomial_less_than_p(sample_size, p, val):
    res = binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

In [306]:
def binomial_more_or_equal_to_p(sample_size, p, val):
    res = 1 - binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

def binomial_more_than_p(sample_size, p, val):
    res = 1 - binom.cdf(val, sample_size, p)
    print(res)
    return res

In [307]:
def binomial_between_p(p1, p2, sample_size, p):
    res = binomial_less_or_equal_to_p(sample_size, p, p2) - binomial_less_or_equal_to_p(sample_size, p, p1 - 1)
    print(res)
    return res

In [308]:
def std_of_binom(sample_size, p):
    res = np.sqrt(sample_size * p * (1 - p))
    print(res)
    return res

In [309]:
def se(p, sample_size):
    res = np.sqrt((p * (1 - p) / sample_size))
    print(res)
    return res

In [310]:
def clt_more_than(p, sample_size, val):
    stde = se(p, sample_size)
    res = p_more_than(val, p, stde)
    print(res)
    return res

In [311]:
def clt_p_to_z(p, sample_size, val):
    stde = se(p, sample_size)
    res = calc_zscore(val, p, stde)
    print(res)
    return res

In [312]:
def confidence_interval(p, sample_size, conf_level_pct):
    std_e = se(p, sample_size)
    z = two_tail_zscore(conf_level_pct)
    margin_e = std_e * z
    print("margin error", margin_e)
    print(p - z * std_e, p + z * std_e)
    return (p - z * std_e, p + z * std_e)

def margin_of_err(z, se):
    return z * se

In [313]:
def conf_interval_in_populations(p1, p2, size_1, size_2, conf_level_pct):
    se_est = np.sqrt(p1 * (1 - p1) / size_1 + p2 * (1 - p2) /size_2)
    statistic = p1 - p2
    z = two_tail_zscore(conf_level_pct)
    res = (statistic - z * se_est, statistic + z * se_est)
    print(res)
    return res

In [314]:
def one_proportion_z_test_statistic(p_observed, p_null_hypo, sample_size):
  z = (p_observed - p_null_hypo) / np.sqrt(p_null_hypo * (1 - p_null_hypo) / sample_size)
  print("one proportion z test statistic is", z)
  return z


In [315]:
def hypo_one_pro(success, sample_size, population_p, confidence_level):
  z = one_proportion_z_test_statistic(success/sample_size, population_p, sample_size)
  print('z is', z)
  print('percentage_after_z_score', percentage_after_z_score(z))
  print('percentage_until_z_score', percentage_until_z_score(z))
  print('confidence_interval', confidence_interval(success/sample_size, sample_size, confidence_level))


In [316]:
def hypo_two_pro(success_1, sample_size_1, success_2, sample_size_2):
  p1 = success_1/sample_size_1
  p2 = success_2/sample_size_2
  p = (success_1 + success_2) / (sample_size_1 + sample_size_2)
  se = np.sqrt(p*(1-p)*(1/sample_size_1 + 1/sample_size_2))
  z = (p1 - p2 - 0)/se
  print('p1 ', p1)
  print('p2 ', p2)
  print('p ', p)
  print('se', se)
  print('z', z)
  return {
    "z": z,
    "p1": p1,
    "p2": p2,
    "p": p,
    "se": se,
    "p1_pop": sample_size_1,
    "p2_pop": sample_size_2,
    "p1_suc": success_1,
    "p2_suc": success_2
  }

In [317]:
res = hypo_two_pro(0.55*1500, 1500, 0.57*1500, 1500)

p1  0.55
p2  0.57
p  0.56
se 0.01812548849916419
z -1.1034185368809317


In [318]:
conf_interval_in_populations(res["p1"],res["p2"], res["p1_pop"], res["p2_pop"], 0.99)

2.5758293035489004
(-0.06667868939650468, 0.026678689396504862)


(-0.06667868939650468, 0.026678689396504862)

In [319]:
def calc_means_stde(n, std):
    return std / np.sqrt(n)

In [320]:
def calc_t(confi_level, data_count):
    return t.ppf(1 - (1 - confi_level) / 2, data_count - 1)

In [348]:
def calc_conv_interval_with_arr(arr, confi_level):
    mean = np.mean(arr)
    std = tstd(arr)
    sample_size = len(arr)
    se_mean = std / np.sqrt(sample_size)
    t_val = calc_t(confi_level, sample_size)
    print('mean', mean)
    print('standard deviation', std)
    print("standard err", se_mean)
    print('sample size', sample_size)
    print("t value", t_val)
    return {"conf_inter": (mean - t_val * se_mean, mean + t_val * se_mean), "mean": mean, "std": std, "s_size": sample_size,"se_mean": se_mean,"t_val": t_val}
    
    

In [419]:
def mean_conf_interval(sample_size, sample_mean, std, conf_level):
    t_val = calc_t(conf_level, sample_size)
    std_e = std/np.sqrt(sample_size)
    print('margin_error', t_val * std_e)
    return (sample_mean - t_val * std_e, sample_mean + t_val * std_e)

mean_conf_interval(15, 38.7, 3, 0.95)

margin_error 1.6613446246939247


(37.03865537530608, 40.36134462469393)

In [350]:
def t_dist_calc(mean_hat, mean, std, sample_size):
    std_e = std / np.sqrt(sample_size)
    res = (mean_hat - mean) / std_e
    print("t is", res)
    return res

In [367]:
def calc_p_value_with_t(t_val, sample_size, option="ne"):
#     res = t.sf(abs(t_val), sample_size - 1)
    t_val = abs(t_val)
    s = sample_size - 1
    if option == "ne":
        return t.sf(t_val, s) * 2
    elif option == "lt":
        return t.sf(t_val, s)
    elif option == "gt":
        return t.sf(t_val, s)
    return "NOT RIGHT OPTION"

In [410]:
arr = [22,
11,
14,
11,
16,
11,
0,
3,
9,
42,
0,
14]
val = calc_conv_interval_with_arr(arr, 0.95)
p_s = np.mean(arr)
val

mean 12.75
standard deviation 11.27446834061972
standard err 3.2546586657133543
sample size 12
t value 2.200985160082949


{'conf_inter': (5.586544575629536, 19.913455424370465),
 'mean': 12.75,
 'std': 11.27446834061972,
 's_size': 12,
 'se_mean': 3.2546586657133543,
 't_val': 2.200985160082949}

In [411]:
t_val = t_dist_calc(val['mean'], 0, val['std'], val['s_size'])

t is 3.917461494293277


In [412]:
calc_p_value_with_t(t_val, val['s_size'], "gt")

0.0012016507789404849

In [415]:
t_val = t_dist_calc(, 71.6, 13.3, 100)

t is 3.3082706766917336


In [417]:
calc_p_value_with_t(t_val, 100, "ne")

0.0013095398440006066

In [403]:
mean_conf_interval(28, 114.84, 42.07, 0.95)

(98.52695699642456, 131.15304300357545)

In [413]:
0.95*400

380.0

In [414]:
def calc_2_sample_t_interval(mean1, mean2, actual_mean1, actual_mean2, std_1, std_2, sample_size_1, sample_size_2):
    res = ((mean1 - mean2) - (actual_mean1-actual_mean2))/np.sqrt(std_1*std_1/sample_size_1 + std_2*std_2/sample_size_2)
    print("t for 2 sample is", res)
    return res

In [421]:
mean_conf_interval(25, 72, 13, 0.95)

margin_error 5.366136260232855


(66.63386373976715, 77.36613626023285)

In [None]:
calc_2_sample_t_interval(173.5, )