In [1]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [10]:
#generate the sample
def gen_data():
    nobs = 1000
    beta = 3
    x = np.random.normal(loc=0, scale=1, size=nobs)
    e = np.random.logistic(size=nobs)
    y = 1*(1 + beta * x + e >= 0) 
    return y,x,nobs

In [11]:
class SimulatedLikelihood(GenericLikelihoodModel):
    
    def __init__(self,*args,  sim_function=np.random.logistic, **kwargs):
        super(SimulatedLikelihood, self).__init__(*args, **kwargs)
        #do the simulation 1x instead of on every step
        nsim = 2000
        np.random.seed(0)
        e = sim_function(size=nsim)
        e = np.repeat(e, self.endog.shape[0])
        self.sims = e.reshape((nsim,self.endog.shape[0]))
        
    
    def nloglikeobs(self, params):
        
        exog = self.exog
        endog = self.endog
        u = np.dot(exog, params)
        
        #generate simulations
        e = self.sims
        
        #compare to each sim
        u = np.tile(u,(e.shape[0],1))
        u = u.reshape((e.shape[0],endog.shape[0]))
               
        #compute likelihood
        p0, p1 = (u<=e).mean(axis=0), (u >= e).mean(axis=0)
        p = (1-endog)*p0 +  endog*p1
        ll = np.log( np.maximum(p,.0001) )#help with numerical zeros
        
        return -ll

In [12]:
yn,xn, nobs = gen_data()
model1 = sm.Probit(yn,sm.add_constant(xn))
model1_res = model1.fit(disp=False)
print(model1_res.summary())
    
model2 = SimulatedLikelihood(yn,sm.add_constant(xn),sim_function=np.random.normal)
model2_res = model2.fit(disp=False)
print(model2_res.summary())

                          Probit Regression Results                           
Dep. Variable:                      y   No. Observations:                 1000
Model:                         Probit   Df Residuals:                      998
Method:                           MLE   Df Model:                            1
Date:                Fri, 10 Jul 2020   Pseudo R-squ.:                  0.4868
Time:                        10:45:00   Log-Likelihood:                -350.38
converged:                       True   LL-Null:                       -682.74
Covariance Type:            nonrobust   LLR p-value:                1.407e-146
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.4810      0.059      8.125      0.000       0.365       0.597
x1             1.7597      0.103     17.160      0.000       1.559       1.961
                          SimulatedLikelihood Result

In [13]:
yn,xn, nobs = gen_data()
model1 = sm.Logit(yn,sm.add_constant(xn))
model1_res = model1.fit(disp=False)
print(model1_res.summary())
    
model2 = SimulatedLikelihood(yn,sm.add_constant(xn))
model2_res = model2.fit(disp=False)
print(model2_res.summary())

                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                 1000
Model:                          Logit   Df Residuals:                      998
Method:                           MLE   Df Model:                            1
Date:                Fri, 10 Jul 2020   Pseudo R-squ.:                  0.5102
Time:                        10:45:25   Log-Likelihood:                -333.86
converged:                       True   LL-Null:                       -681.55
Covariance Type:            nonrobust   LLR p-value:                3.008e-153
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.8413      0.107      7.879      0.000       0.632       1.051
x1             3.2670      0.206     15.890      0.000       2.864       3.670
                          SimulatedLikelihood Result

In [None]:
def compute_llr(yn,xn):
    
    model1 = SimulatedLikelihood(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    ll1 = model1.loglikeobs(model1_fit.params)
    
    model2 = SimulatedLikelihood(yn,sm.add_constant(xn),sim_function=np.random.normal)
    model2_fit = model2.fit(disp=False)
    ll2 = model2.loglikeobs(model2_fit.params)
    
    llr = ll1.sum() - ll2.sum()
    omega = ( (ll1- ll2)**2 ).mean()
    return llr/omega


yn,xn,nobs = gen_data()
print(compute_llr(yn,xn))

In [19]:
def regular_test(yn,xn):
    test_stat = compute_llr(yn,xn)
    return np.abs(test_stat) >= 1.96


def bootstrap_test(xn,yn):
    test_stats = []
    yn,xn,nobs = gen_data()
    trials = 10
    for i in range(trials):
        subn = 200
        np.random.seed()
        sample  = np.random.choice(np.arange(0,nobs),subn,replace=False)
        ys,xs = yn[sample],xn[sample]
        test_stat = compute_llr(ys,xs)
        test_stats.append(test_stat)

    cv_upper = np.percentile(test_stats, 97.5, axis=0)
    cv_lower = np.percentile(test_stats, 2.5, axis=0)
    print(test_stats)
    return 0 <= cv_upper or 0 >= cv_lower
    

yn,xn,nobs = gen_data()
print(bootstrap_test(yn,xn))



[160.24259786358473, -729.4929203636041, -54.678115497719745, -127.02247096491716, 6.933187997041706, 200.53375406874878, 127.3432142140581, 275.6641571972966, -19.620142573845396, 4.618979269023662, -79.5093335222444, -3497.6729787900063, 77.53832142281387, 146.60762071397858, -140.234367493629, -77.97280388196147, 35.18303976728896, 176.18032107934417, 189.19761231749789, 210.6444902378089, -129.847853658734, 274.92833681126166, -2.669493427412065, 259.1598031503696, 137.31402801032985, 185.75579274596888, 52.26845744674858, 277.92539847113994, 310.2478404931855, 92.19763680489538, 102.54990632448656, 32.72168441968735, -313.82459241923647, 54.566066499508544, -299.0883177456318, 137.82331158300022, 321.8916391754789, -727.0230167237145, 73.29183229112422, -48.49811251665932, -40.47546172881125, 80.58019217962097, -39.45663691605165, 264.81952445049916, 179.32541991786903, 214.90928159595188, 276.17162664720297, 168.19255040688714, -336.4780618459885, 209.9063598181118, 208.194025992