In [1]:
## housekeeping
import numpy as np

#### Komolgorov-Lilliefors test
answers the question: "Is my data gaussian, for any $\mu \in \mathbb {R}$ and $\sigma ^2 > 0$?"  
$\widetilde{T}_ n = \sqrt{n}\sup _{t \in \mathbb {R}} |F_ n(t) - \Phi _{\widehat{\mu }, \widehat{\sigma }^2}|$  
$\tilde{\psi }_ n = \mathbf{1}(\widetilde{T}_ n > q_{\nu }')$  
$\tilde{H}_0: \mathbf{P} \in \{  \mathcal{N}(\mu , \sigma ^2) \} _{\mu \in \mathbb {R}, \sigma ^2 > 0}$  
$\tilde{H}_1: \mathbf{P} \notin \{  \mathcal{N}(\mu , \sigma ^2) \} _{\mu \in \mathbb {R}, \sigma ^2 > 0}$

In [2]:
## perform the Komolgorov-Lilliefors test for gaussianity on the following dataset:
t = np.array([.01, .1, .2, .28, .8]) # data

# compute the sample mean and sample variance
n = len(t) # number of observations
Xn = np.mean(t) # sample mean
Sn = 1 / (n-1) * np.sum((t - Xn)**2) # sample variance

# compute the empirical CDF
Fn = np.cumsum(np.ones(n) / 5) # empirical CDF

In [3]:
## helper function to generate a cumulative probability for H0 gaussian
def gcdf(X, mu, sigma2, lo=-1000, hi=1000, step=0.001):
    x = np.arange(lo, hi, step) # discretization
    f = 1 / (2 * np.pi * sigma2)**.5 * np.exp(-(x - mu)**2 / (2 * sigma2)) # pdf
    F = np.cumsum(step * f) # integrate
    d = np.abs(x - X) # distance to nearest point in discretization
    xstar = x[d == np.min(d)] # get closest x
    return F[x == xstar][0] # return F of closest x

In [14]:
## compute test statistic
sup = 0 # initialize supremum
for P in Fn: # for each discrete probability step in Fn
    arg = np.abs(P - gcdf(t[Fn == P][0], Xn, Sn)) # take distance between two CDFs
    if arg > sup: # keep the largest
        sup = arg
Tn = sup # test statistic

In [45]:
## test
table_value = .315 # from KL table
alpha = 0.1 # level
truth = ["<=", ">"] # Tn > table_val to reject
outcome = ["insufficient", "sufficient"]
print("For Tn = " + str(np.round(Tn, decimals=3)) + ", and n = " + str(n) + " observations, Tn "
     + str(truth[int(Tn > table_value)]) + " " + str(table_value) + ".  We conclude there is " + 
      str(outcome[int(Tn > table_value)]) + " evidence to reject the null at level " + str(alpha) + ".")

For Tn = 0.297, and n = 5 observations, Tn <= 0.315.  We conclude there is insufficient evidence to reject the null at level 0.1.
