In [17]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats

In [18]:
#generate the sample
def gen_data():
    nobs = 1000
    beta = 3
    x = np.random.uniform(low=-1., high=1., size=nobs)
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs) #np.random.uniform(low=-1., high=1., size=nobs) #
    y = np.exp(1 + beta * x + e)  
    return y,x,nobs

In [59]:
yn,xn,nobs = gen_data()
model = sm.OLS((yn),sm.add_constant(xn))
model_fit = model.fit(disp=False)

In [60]:
def compute_loglike(resid):
    sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
    ll = np.log(stats.norm.pdf(resid,loc=0,scale=sigma))
    return ll
 
print(compute_loglike(model_fit.resid).sum())
print(model.loglike(model_fit.params))

-4941.108401009704
-4941.108401009704


In [61]:
def compute_llr(yn,xn):
    
    model1 = sm.OLS(np.log(yn),sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    ll1 = compute_loglike(model1_fit.resid)
    
    model2 = sm.OLS(yn,sm.add_constant(xn))
    model2_fit = model2.fit(disp=False)
    ll2 = compute_loglike(model2_fit.resid)
    
    llr = ll1.sum() - ll2.sum()
    omega2 = (ll1- ll2).var()
    return llr,np.sqrt(omega2)


yn,xn,nobs = gen_data()
print(compute_llr(yn,xn))

(3495.4756559713915, 3.0766029773185597)


In [None]:
def regular_test(yn,xn,nobs,hist=False):
    llr, omega = compute_llr(yn,xn)
    test_stat = llr/(omega*np.sqrt(nobs))
    #plot
    if hist:
        x = np.linspace(-2.5, 2.5, 100)
        plt.plot(x, stats.norm.pdf(x, 0, 1),label="Normal")
    
    return 1*(test_stat >= 1.96) + 2*( test_stat <= -1.96)


def bootstrap_test(yn,xn,nobs,hist=False):
    test_stats = []
    trials = 100
    for i in range(trials):
        subn = 1000
        np.random.seed()
        sample  = np.random.choice(np.arange(0,nobs),subn,replace=True)
        ys,xs = yn[sample],xn[sample]
        llr, omega = compute_llr(ys,xs)
        test_stat = llr/(omega*np.sqrt(subn))
        test_stats.append(test_stat)
        
    llr, omega = compute_llr(yn,xn)
    test_stat = llr/(omega*np.sqrt(nobs))
    
    #plot
    if hist:
        plt.hist( 2*test_stat - test_stats, density=True,bins=10, label="Bootstrap")
    
    cv_lower = 2*test_stat - np.percentile(test_stats, 97.5, axis=0)
    cv_upper = 2*test_stat -  np.percentile(test_stats, 2.5, axis=0)
    return  2*(0 >= cv_upper) + 1*(0 <= cv_lower)
    

yn,xn,nobs = gen_data()
print(bootstrap_test(yn,xn,nobs,hist=True))
print(regular_test(yn,xn,nobs,hist=True))

plt.title("Comparison with bootstrap")
plt.xlabel("Test Statistic")
plt.ylabel("Density")
plt.legend()
plt.savefig('../figs/bootstrap_compare2')
plt.show()

In [None]:
reg = np.array([0, 0 ,0])
boot = np.array([0, 0 ,0])
omega = 0
llr = 0
total = 1000
for i in range(total):
    np.random.seed()
    yn,xn,nobs = gen_data()
    llrn,omegan = compute_llr(yn,xn)
    
    #update the llr
    llr = llr +llrn
    omega = omega +omegan
    reg_index = regular_test(yn,xn,nobs)
    
    #update test results
    boot_index = bootstrap_test(yn,xn,nobs)
    reg[reg_index] = reg[reg_index] + 1
    boot[boot_index] = boot[boot_index] + 1

print("reg: %s, boot: %s, llr:%s, omega:%s"%(reg/total,boot/total,llr/total,omega/total))