Build a confidence interval for proportions

In [30]:
n = 850 # sample size
p_hat = 0.67 # point estimate
conf_level = 0.95 # confidence level
alpha = 1 - conf_level

In [31]:
import math
from scipy import stats #  selective import

std_err = math.sqrt(p_hat * (1 - p_hat) / n) # standard error

z = stats.norm.ppf(1 - alpha / 2) # critical value

half_length = z * std_err # margin of error

ci = (p_hat - half_length, p_hat + half_length)
ci

(0.6383893810609106, 0.7016106189390895)

Calculate confidence interval for proportions

In [25]:
from statsmodels.stats.proportion import proportion_confint

ci = proportion_confint(count=p_hat * n, nobs=n, alpha=alpha)
#ci = proportion_confint(count=0.67 * 850, nobs=850, alpha=0.05)
ci

(0.6383893810609106, 0.7016106189390895)

Perform hypothesis test for one proportion

In [26]:
from statsmodels.stats.proportion import proportions_ztest

stat, pval = proportions_ztest(count=p_hat*n, nobs=n, value=0.5, alternative='two-sided')
stat, pval

(10.540567965905424, 5.615870314163244e-26)

In [33]:
stat, pval = proportions_ztest(
    count=p_hat*n,
    nobs=n,
    value=0.5,
    alternative='larger')
stat, pval

(10.540567965905424, 2.807935157081622e-26)

In [34]:
stat, pval = proportions_ztest(
    count=p_hat*n,
    nobs=n,
    value=0.5,
    alternative='smaller')
stat, pval

(10.540567965905424, 1.0)

Build confidence interval for difference of proportions

In [36]:
counts = [69, 454] # "positive" outcomes
n = [105, 680] # sample sizes
p_hat = [ # point estimates
    counts[0] / n[0], # first group
    counts[1] / n[1] # second group
    ]

conf_level = 0.95 # confidence level
alpha = 1 - conf_level

In [39]:
import math
from scipy import stats

std_err = math.sqrt( # standard error
    p_hat[0] * (1 - p_hat[0]) / n[0] +
    p_hat[1] * (1 - p_hat[1]) / n[1]
    )

z = stats.norm.ppf(1 - alpha / 2) # critical value

point_estimate = p_hat[0] - p_hat[1] # point estimate

half_length = z * std_err # margin of error

ci = (point_estimate - half_length, point_estimate + half_length)
ci

(-0.10795379986302829, 0.08694539650168383)

Perform hypothesis test for difference of proportions

In [40]:
from statsmodels.stats.proportion import proportions_ztest

stat, pval = proportions_ztest(count=counts, nobs=n, alternative='two-sided')
stat, pval = proportions_ztest(count=[69, 454], nobs=[105, 680],
                               alternative='two-sided')
stat, pval

(-0.21244450181543953, 0.831760270598643)

In [41]:
sample_size = 356
positive_outcomes = 201
proportion = positive_outcomes / sample_size
confidence_level = .95
significance_level = 1 - confidence_level # this is alpha!

proportion_confint(
    count=positive_outcomes,
    nobs=sample_size,
    alpha=significance_level
    )

(0.5131032144560886, 0.6161102686899788)

In [42]:
proportion_confint(201, 356, .05)

(0.5131032144560886, 0.6161102686899788)

In [45]:
ci = proportion_confint(1262, 2253, .05)
ci

(0.5396458584288474, 0.5806382072613435)

In [44]:
p_hat = 1262 / 2253
p_hat

0.5601420328450954

In [46]:
(ci[0] + ci[1]) / 2

0.5601420328450954

In [47]:
zscore, pval = proportions_ztest(
    1262, 2253, # sample proportion
    value=0.5 # hypothesized proportion
    )
# H_0: p = 50%
# H_A: p != 50%
zscore, pval

if pval < alpha:
    print('Reject H_0')
else:
    print('Fail to reject H_0')

(5.751132671859348, 8.864751263709231e-09)

## 3) Wafer

The article “Limited Yield Estimation for Visual Defect Sources” (IEEE Trans. on Semiconductor Manuf., 1997: 17–23) reported that, in a study of a particular wafer inspection process, 356 dies were examined by an inspection probe and 201 of these passed the probe. Assuming a stable process, calculate a 95% (two-sided) confidence interval for the proportion of all dies that pass the probe.

In [None]:
proportion_confint(201, 356, alpha=1-.95)

## 6) Wireless Means for Online Access

An April 2009 survey of 2253 American adults conducted by the Pew Research Center’s Internet & American Life Project revealed that 1262 of the respondents had at some point used wireless means for online access.
Calculate and interpret a 95% CI for the proportion of all American adults who at the time of the survey had used wireless means for online access.

In [None]:
proportion_confint(1262, 2253, alpha=1-.95)