In [None]:
# Mean Variance Independence test between a categorical variable(2 levels) and a quantitative variable

# Reference: [20]Cui H, Zhong W. A distribution-free test of independence based on mean variance index[J]. 
##               Computational Statistics & Data Analysis, 2019, 139, 117-133.

In [1]:
import numpy as np

In [None]:
def MV_test(x, y):
    '''
    params: x: a quantitative variable, given as np.array or list
            y: a qualitative variable (with 2 levels), given as np.array or list
            
    returns:
            statistic of MV independence test as scalar
    '''
    
    def F(X,x):
    # computes empirical cdf.
        n = np.shape(X)
        f = X <= x
        return(np.sum(f)/np.array(n))
    
    num_obs = len(y)
    
    index_0 = y == 0
    index_1 = y == 1
    n0 =  np.sum(index_0)
    p0 = n0/num_obs
    p1 = 1-p0
    
    results = np.zeros(num_obs,3) 
    x_0 = x[index_0]
    x_1 = x[index_1]
    
    for i in range(0,num_obs):
        f0 = F(x_0,x[i])
        f1 = F(x_1,x[i])
        f2 = F(x,x[i])
        results[i,0]=f2
        results[i,1]=f0
        results[i,2]=f1
    
    T = p0 * np.sum((results[:,1]-results[:,0])**2) + p1 * np.sum((results[:,2]-results[:,0])**2)
    
    return T

In [None]:
# quantiles table of asymptotic null distribution of the MV test statistic (given in the original paper)

![/table.png](table.png)