# Estimate whether a given outlier can be ignored according to Chauvenet's criterion

Assumes the distribution of prior measurements is pulled from a Gaussian distribution with mean and variance estimated from the prior measurements.

In [1]:
import sys
import numpy as np
from scipy.special import erf

In [2]:
#computes the probability of an event
#of magnitude x or greater from
#a gaussian distribution
def event_probability(x, mu=0.0, s=1.0):
    #x is the value of the event
    #mu is the gaussian mean (default 0.0)
    #s is the std dev (default 1.0)

    #s is how many sigma away x is from the mean
    z = np.fabs((x-mu)/s)

    return 1.0 - erf(z/np.sqrt(2))

In [5]:
def chauvenet_criterion(prior_measurements, outlier):
    #prior_measurements is our set of previous measurements
    #outlier is the value we want to check

    #returns True if the outlier can be discarded
    #returns False if the outlier cannot be immediately discarded

    #compute mean and std of prior measurements
    #and assume these are representative of the gaussian
    mean = np.mean(prior_measurements)
    std = np.std(prior_measurements)

    #compute the probability of the outlier
    outlier_probability = event_probability(outlier, mu=mean, s=std)

    #get the number of events, include new outlier
    N = len(prior_measurements) + 1

    #check Chauvenet's criterion
    if (N * outlier_probability < 0.5) :
        return True
    
    #if we are here in the code, then the above condition was false
    #we cannot discard the outlier
    return False

Test Chauvenet's criterion using a real Gaussian distribution and a 5 -sigma outlier

In [37]:
# pull 1000 random variates from a unit gaussian
N = 1000
x = np.random.normal(0.0, 1.0, N)

#make an outlier
outlier = 3.0

#check Chauvenet's criterion
answer = chauvenet_criterion(x, outlier)

print(f'Can we reject an outlier {outlier} for N = {N} samples? {answer}')

Can we reject an outlier 3.0 for N = 1000 samples? False
