In [1]:
# parametric tolerance interval
import numpy as np
from numpy.random import seed
from numpy.random import randn
from numpy import mean
from numpy import sqrt
from scipy.stats import chi2, norm

In [2]:
# example of tolerance interval
# The range from x to y covers 95% of the data with a confidence of 99%

#### Tolerance Interval for Gaussian Distribution

In [3]:
# seed the random number generator
seed(42)

In [4]:
# generate dataset
mean, std = 50, 5 # mean and standard deviation
data = 5 * randn(100) + 50

In [5]:
data

array([52.48357077, 49.30867849, 53.23844269, 57.61514928, 48.82923313,
       48.82931522, 57.89606408, 53.83717365, 47.65262807, 52.71280022,
       47.68291154, 47.67135123, 51.20981136, 40.43359878, 41.37541084,
       47.18856235, 44.9358444 , 51.57123666, 45.45987962, 42.93848149,
       57.32824384, 48.8711185 , 50.33764102, 42.87625907, 47.27808638,
       50.55461295, 44.24503211, 51.87849009, 46.99680655, 48.54153125,
       46.99146694, 59.26139092, 49.93251388, 44.71144536, 54.11272456,
       43.89578175, 51.04431798, 40.20164938, 43.35906976, 50.98430618,
       53.6923329 , 50.85684141, 49.42175859, 48.49448152, 42.60739005,
       46.40077896, 47.69680615, 55.28561113, 51.71809145, 41.18479922,
       51.62041985, 48.0745886 , 46.61539   , 53.05838144, 55.15499761,
       54.6564006 , 45.80391238, 48.45393812, 51.65631716, 54.87772564,
       47.60412881, 49.07170512, 44.46832513, 44.01896688, 54.06262911,
       56.78120014, 49.63994939, 55.01766449, 51.80818013, 46.77

In [6]:
# specify degrees of freedom
n = len(data)
dof = n - 1

In [7]:
# specify data coverage
prop = 0.95
prop_inv = (1.0 - prop) / 2.0
gauss_critical = norm.isf(prop_inv)

In [8]:
print('Gaussian critical value: %.3f (coverage=%d%%)' % (gauss_critical, prop*100))

Gaussian critical value: 1.960 (coverage=95%)


In [9]:
# specify confidence
prob = 0.99
chi_critical = chi2.isf(q=prob, df=dof)

In [10]:
print('Chi-Squared critical value: %.3f (prob=%d%%, dof=%d)' % (chi_critical, prob*100, dof))

Chi-Squared critical value: 69.230 (prob=99%, dof=99)


In [11]:
# tolerance
interval = sqrt((dof * (1 + (1/n)) * gauss_critical**2) / chi_critical)

In [12]:
print('Tolerance Interval: %.3f' % interval)

Tolerance Interval: 2.355


In [13]:
# summarize
data_mean = np.mean(data)
lower, upper = data_mean-interval, data_mean+interval

In [14]:
print('%.2f to %.2f covers %d%% of data with a confidence of %d%%' % (lower, upper, prop*100, prob*100))

47.13 to 51.84 covers 95% of data with a confidence of 99%
