In [35]:
import scipy
from scipy.stats import norm, binom, iqr, t
import numpy as np
from numpy.random import seed
from numpy.random import normal
import matplotlib.pyplot as plt

In [36]:
def to_percentage(n):
    return str(n * 100) + "%"

In [37]:
def percentage_until_z_score(z_score):
    res = norm.cdf(z_score)
    print(to_percentage(res))
    return res

In [38]:
def percentage_after_z_score(z_score):
    res = norm.cdf(-z_score)
    print(to_percentage(res))
    return res

def percentage_between_z_score(z1, z2):
    res = percentage_until_z_score(z2) - percentage_until_z_score(z1)
    print(res)
    return res

In [39]:
def percentage_two_tail(z_score):
    res = norm.cdf(1-((1 - z_score) / 2))
    print(to_percentage(res))
    return to_percentage(res)
def two_tail_zscore(percentage):
    res = norm.ppf(1 - (1 - percentage) / 2)
    print(res)
    return res

In [40]:
def calc_zscore(val, mean, std_d):
    res = (val - mean) / std_d
    print(res)
    return res

In [41]:
def p_less_than(val, mean, std_d):
    res = norm(mean, std_d).cdf(val)
    print(to_percentage(res))
    return res

In [42]:
def p_more_than(val, mean, std_d):
    res = (1 - norm(mean, std_d).cdf(val))
    print(to_percentage(res))
    return res

In [43]:
def p_between(val1, val2, mean, std_d):
    if val1 <= val2:
        return p_less_than(val2, mean, std_d) - p_less_than(val1, mean, std_d)
    else:
        return p_less_than(val1, mean, std_d) - p_less_than(val2, mean, std_d)

In [44]:
def p_to_zscore(percentage):
    res = norm.ppf(percentage)
    print(res)
    return res

def p_to_val(percentage, mean, std_d):
    std_diff = p_to_zscore(percentage)
    res = mean + std_diff * std_d
    print(res)
    return res

In [45]:
def binomial_exact_p(sample_size, p, expected_val):
    res = binom.pmf(expected_val, sample_size, p)
    print(res)
    return res

In [46]:
def binomial_less_or_equal_to_p(sample_size, p, val):
    res = binom.cdf(val, sample_size, p)
    print(res)
    return res
def binomial_less_than_p(sample_size, p, val):
    res = binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

In [47]:
def binomial_more_or_equal_to_p(sample_size, p, val):
    res = 1 - binom.cdf(val - 1, sample_size, p)
    print(res)
    return res

def binomial_more_than_p(sample_size, p, val):
    res = 1 - binom.cdf(val, sample_size, p)
    print(res)
    return res

In [48]:
def binomial_between_p(p1, p2, sample_size, p):
    res = binomial_less_or_equal_to_p(sample_size, p, p2) - binomial_less_or_equal_to_p(sample_size, p, p1 - 1)
    print(res)
    return res

In [49]:
def std_of_binom(sample_size, p):
    res = np.sqrt(sample_size * p * (1 - p))
    print(res)
    return res

In [50]:
def se(p, sample_size):
    res = np.sqrt((p * (1 - p) / sample_size))
    print(res)
    return res

In [51]:
def clt_more_than(p, sample_size, val):
    stde = se(p, sample_size)
    res = p_more_than(val, p, stde)
    print(res)
    return res

In [52]:
def clt_p_to_z(p, sample_size, val):
    stde = se(p, sample_size)
    res = calc_zscore(val, p, stde)
    print(res)
    return res

In [53]:
def confidence_interval(p, sample_size, conf_level_pct):
    std_e = se(p, sample_size)
    z = two_tail_zscore(conf_level_pct)
    margin_e = std_e * z
    print("margin error", margin_e)
    print(p - z * std_e, p + z * std_e)
    return (p - z * std_e, p + z * std_e)

def margin_of_err(z, se):
    return z * se

In [54]:
def conf_interval_in_populations(p1, p2, size_1, size_2, conf_level_pct):
    se_est = np.sqrt(p1 * (1 - p1) / size_1 + p2 * (1 - p2) /size_2)
    statistic = p1 - p2
    z = two_tail_zscore(conf_level_pct)
    res = (statistic - z * se_est, statistic + z * se_est)
    print(res)
    return res

In [55]:
def one_proportion_z_test_statistic(p_observed, p_null_hypo, sample_size):
  z = (p_observed - p_null_hypo) / np.sqrt(p_null_hypo * (1 - p_null_hypo) / sample_size)
  print("one proportion z test statistic is", z)
  return z

In [56]:
def hypo_one_pro(success, sample_size, population_p, confidence_level):
  z = one_proportion_z_test_statistic(success/sample_size, population_p, sample_size)
  print('z is', z)
  print('percentage_after_z_score', percentage_after_z_score(z))
  print('percentage_until_z_score', percentage_until_z_score(z))
  print('confidence_interval', confidence_interval(success/sample_size, sample_size, confidence_level))


In [57]:
def hypo_two_pro(success_1, sample_size_1, success_2, sample_size_2):
  p1 = success_1/sample_size_1
  p2 = success_2/sample_size_2
  p = (success_1 + success_2) / (sample_size_1 + sample_size_2)
  se = np.sqrt(p*(1-p)*(1/sample_size_1 + 1/sample_size_2))
  z = (p1 - p2 - 0)/se
  print('p1 ', p1)
  print('p2 ', p2)
  print('p ', p)
  print('se', se)
  print('z', z)
  return {
    "z": z,
    "p1": p1,
    "p2": p2,
    "p": p,
    "se": se,
    "p1_pop": sample_size_1,
    "p2_pop": sample_size_2,
    "p1_suc": success_1,
    "p2_suc": success_2
  }

In [58]:
res = hypo_two_pro(0.55*1500, 1500, 0.57*1500, 1500)

p1  0.55
p2  0.57
p  0.56
se 0.01812548849916419
z -1.1034185368809317


In [59]:
conf_interval_in_populations(res["p1"],res["p2"], res["p1_pop"], res["p2_pop"], 0.99)

2.5758293035489004
(-0.06667868939650468, 0.026678689396504862)


(-0.06667868939650468, 0.026678689396504862)

In [60]:
percentage_between_z_score(-1, 1)

84.1344746068543%
15.865525393145708%
0.6826894921370859


0.6826894921370859

In [61]:
def calc_means_stde(n, std):
    return std / np.sqrt(n)

In [62]:
calc_means_stde(9, 0.6)

0.19999999999999998

In [63]:
(6.4-7.6)/0.2

-5.9999999999999964

In [66]:
1 - (percentage_until_z_score(-3) * 2)

0.13498980316300932%


0.9973002039367398

In [68]:
percentage_after_z_score(1) * 2

15.865525393145708%


0.31731050786291415

In [71]:
2000/40

50.0

In [73]:
(53700-53246)/50

9.08

In [79]:
(5.75-1.959963984540054*0.48, 5.75+1.959963984540054*0.48)

(4.809217287420774, 6.690782712579226)

In [77]:
percentage_after_z_score(1.64485)

5.000037406907855%


0.05000037406907855

In [78]:
def calc_t(confi_level, data_count):
    t.ppf(confi_level, data_count - 1)

1.959963984540054


1.959963984540054