In [44]:
import  numpy as np
import statsmodels.api as sm
from scipy.stats import norm 

# One Population Proportion

Null Hypothesis: p = 0.52

Alternative Hypothesis: p > 0.52 (this is a one-sided test)

Sample size: n = 1018

Point estimate = Number of success / sample size

Point estimate: phat = 0.56

In [31]:
n = 1018
pnull = 0.52
phat = 0.56
stat, pval = sm.stats.proportions_ztest(phat*n, n, pnull, alternative = 'larger')

print(f"statistic = {stat.round(2)}, pvalue = {pval.round(3)}")

statistic = 2.57, pvalue = 0.005


So, we reject the null hypothesis.

# Two Population Proportion

Null Hypothesis: p1-p2 = 0
Alternative Hypothesis: p1-p2 != 0

### Let's first calculate the test statistics manually

In [53]:
x1 = 91 # nuber of success
n1 = 247 # sample size
p1 = round(x1/n1,2) # point estimate or phat

x2 = 120
n2 = 308
p2 = round(x2/n2, 2)

# Estimaste of combined population proportion
phat = (x1+x2)/(n1+n2)

# Estimated standard error of the combined population proportion
se = np.sqrt(phat*(1-phat)*(1/n1+1/n2))

# test statistic
test_stat = (p1-p2)/se
pval = 2*norm.cdf(test_stat)

print(f'Test statistic: {round(test_stat,2)}')
print(f'P value: {round(pval,2)}')

Test statistic: -0.48
P value: 0.63


So, we fail to reject the null hypothesis and conclude that there is not enough evidence to say that the two population proportions are significantly different.  

### Now, let's create random numbers from these two proportion distributions and conduct the test using Statsmodels library

In [54]:
rng = np.random.default_rng(111)
population1 = rng.binomial(1, p1, size = n1)
population1[:10]           

array([0, 0, 0, 1, 1, 0, 1, 1, 0, 0], dtype=int64)

In [55]:
rng = np.random.default_rng(222)
population2 = rng.binomial(1, p2, size = n2)
population2[:10]

array([0, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=int64)

In [58]:
z_statistic, pval = sm.stats.proportions_ztest(count = [x1,x2], nobs = [n1,n2])
z_statistic, pval

(-0.5110545335044571, 0.6093128715165157)

As we can see, it has produced almost the same statistic values as above