In [2]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import scipy.linalg as linalg

from scipy.optimize import minimize
from scipy.stats import norm

import statsmodels.api as sm

In [3]:
#generate the sample
def gen_data():
    nobs = 1000
    beta = 3
    x = np.random.uniform(low=-1., high=1., size=nobs)
    e = np.random.uniform(low=-1., high=1., size=nobs) # np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1*(1 + beta * x + e >= 0) 
    return y,x,nobs

In [4]:
def ndVuong(model1,model2,alpha,nsims):
    
    model1_fit = model1.fit(disp=False)
    ll1 = model1.loglikeobs(model1_fit.params)
    grad1 =  model1.score_obs(model1_fit.params)
    hess1 =  model1.hessian(model1_fit.params)
    
    model2_fit = model2.fit(disp=False)
    ll2 = model2.loglikeobs(model2_fit.params)
    grad2 =  model2.score_obs(model2_fit.params)
    hess2 =  model2.hessian(model2_fit.params)
    print(grad2.shape)
    print(hess2.shape)
    
    k1 = len(model1_fit.params)
    k2 = len(model2_fit.params)
    k = k1 + k2
    n = len(ll1)
    
    #A_hat:
    A_hat1 = np.concatenate([hess1,np.zeros((k1,k2))])
    A_hat2 = np.concatenate([np.zeros((k2,k1)),-1*hess2])
    A_hat = np.concatenate([A_hat1,A_hat2],axis=1)

    #B_hat, covariance of the score...
    B_hat =  np.concatenate([grad1,-grad2],axis=1) #might be a mistake here..
    B_hat = np.cov(B_hat.transpose())
    
    #compute eigenvalues for weighted chisq
    sqrt_B_hat= linalg.sqrtm(B_hat)
    W_hat = np.matmul(sqrt_B_hat,linalg.inv(A_hat))
    W_hat = np.matmul(W_hat,sqrt_B_hat)
    V,W = np.linalg.eig(W_hat)

    abs_vecV = np.abs(V)-np.max(np.abs(V));
    rho_star = 1*(abs_vecV==0);
    rnorm = np.dot(rho_star.transpose(),rho_star)
    rho_star = np.dot( 1/np.sqrt(rnorm), rho_star)
    rho_star = np.array([rho_star])

    #simulate the normal distr asociated with parameters...
    np.random.seed()
    Z0 = np.random.normal( size=(nsims,k+1) )
    VZ1 = np.concatenate( [np.array([[1]]),rho_star.transpose() ])
    VZ2 = np.concatenate( [ rho_star,np.identity(k)])
    VZ = np.concatenate([VZ1,VZ2],axis=1)

    Z = np.matmul(Z0,linalg.sqrtm(VZ))
    Z_L = Z[:,0]            #$Z_Lambda$
    Z_p = Z[:,1:k+1]        #$Z_phi^\ast$
    
    #trace(V)  #diagonostic line
    tr_Vsq = (V*V).sum()
    V_nmlzd = V/np.sqrt(tr_Vsq) #V, normalized by sqrt(trVsq);

    J_Lmod = lambda sig,c: sig*Z_L - np.matmul(Z_p*Z_p,V_nmlzd)/2+ V_nmlzd.sum()/2
    
    J_omod = (lambda sig,c: sig**2 - 2*sig*np.matmul(Z_p,V_nmlzd*rho_star[0])
              + np.matmul(Z_p*Z_p,V_nmlzd*V_nmlzd) + c)
    
    quant = lambda sig,c: np.quantile( np.abs( J_Lmod(sig,c)/np.sqrt(J_omod(sig,c))) ,1-alpha )

    sigstar = lambda c : minimize(lambda sig: -1*quant(sig[0],c), [2.5]).x
    cv0 = quant(sigstar(0),0) # critical value with c=0
    
    z_normal = norm.ppf(1-alpha/2)
    z_norm_sim = max(z_normal,np.quantile(np.abs(Z_L),1-alpha)) #simulated z_normal
    
    
    cv = max(cv0,z_normal)
    cstar = np.array([0])
    
    #if cv0 - z_norm_sim > 0.1:  # if critical value with c=0 is not very big
    #    f = lambda c: ((quant(sigstar(c[0]),c[0])-z_norm_sim)-0.1)**2
    #    cstar =  minimize(f, [5]).x
    #    cv = max(quant(sigstar(cstar),cstar),z_normal)
    
    #Computing the ND test statistic:
    nLR_hat = llr = ll1.sum() - ll2.sum()
    nomega2_hat = (ll1- ll2).var() ### this line may not be correct #####
                                        
    #Non-degenerate Vuong Tests    
    Tnd = (nLR_hat+V.sum()/2)/np.sqrt(n*nomega2_hat + cstar*(V*V).sum())
    
    return 1*(Tnd[0] >= cv) + 2*(Tnd[0] <= -cv)
    
    
yn,xn,nobs = gen_data()
model1 = sm.Probit(yn,sm.add_constant(xn))
model2 = sm.Logit(yn,sm.add_constant(xn))

print(ndVuong(model1,model2,.05,1000))

(1000, 2)
(2, 2)
1


In [None]:
def monte_carlo():
    shi = np.array([0, 0 ,0])
    total = 1000
    
    for i in range(total):
        np.random.seed()
        yn,xn,nobs = gen_data()
        model1 = sm.Probit(yn,sm.add_constant(xn))
        model1_fit = model1.fit(disp=False)
    
        model2 = sm.Logit(yn,sm.add_constant(xn))
        model2_fit = model2.fit(disp=False)

        shi_index = ndVuong(model1,model2,.05,1000)
        shi[shi_index] = shi[shi_index] + 1
    return shi/total

shi = monte_carlo()
print(shi)

In [None]:
#dgp1 - [0.836 0.164 0.   ]
#dgp2 - [0.844 0.156 0.   ]
#dgp3 - [0.009 0.991 0.   ]

In [None]:
#data dependent c*
#20 - .2
#20 - .3 

#c^*=0
#1000 - 0.164