In [1]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [2]:
#generate the sample
def gen_data():
    nobs = 5000
    beta = 3
    x = np.random.uniform(low=-3., high=3., size=nobs)
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1*(1 + beta * x + e >= 0) 
    return y,x,nobs

In [3]:
def compute_llr(yn,xn):
    
    model1 = sm.Probit(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    ll1 = model1.loglikeobs(model1_fit.params)
    
    model2 = sm.Logit(yn,sm.add_constant(xn),sim_function=np.random.normal)
    model2_fit = model2.fit(disp=False)
    ll2 = model2.loglikeobs(model2_fit.params)
    
    llr = ll1.sum() - ll2.sum()
    omega2 = (ll1- ll2).var()
    return llr/np.sqrt(omega2)


yn,xn,nobs = gen_data()
print(compute_llr(yn,xn))

193.10085746898713


In [4]:
def regular_test(yn,xn,nobs):
    test_stat = compute_llr(yn,xn)/np.sqrt(nobs)
    return np.abs(test_stat) >= 1.96


def bootstrap_test(yn,xn,nobs):
    test_stats = []
    trials = 100
    for i in range(trials):
        subn = 1000
        np.random.seed()
        sample  = np.random.choice(np.arange(0,nobs),subn,replace=False)
        ys,xs = yn[sample],xn[sample]
        test_stat = compute_llr(ys,xs)/np.sqrt(nobs)
        test_stats.append(test_stat)
    cv_upper = np.percentile(test_stats, 97.5, axis=0)
    cv_lower = np.percentile(test_stats, 2.5, axis=0)
    return 0 >= cv_upper or 0 <= cv_lower
    

yn,xn,nobs = gen_data()
print(bootstrap_test(yn,xn,nobs))
print(regular_test(yn,xn,nobs))

False
False


In [5]:
reg = 0
boot = 0 
total = 100
for i in range(total):
    np.random.seed()
    yn,xn,nobs = gen_data()
    reg = regular_test(yn,xn,nobs) +reg
    boot = bootstrap_test(yn,xn,nobs) +boot

print("reg: %s, boot: %s"%(reg/total,boot/total))

reg: 0.3, boot: 0.04


In [6]:
#reg: 0.337, boot: 0.049