In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests8 as vuong_tests_fast

In [2]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y


def setup_shi(yn,xn,return_model=False,num_params=4):
    x1n,x2n = xn[:,0],xn[:,1:num_params+1]
    
    # model 1 grad, etc.
    model1 = sm.OLS(yn,sm.add_constant(x1n))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    
    model1_deriv = OLS_loglike(yn,sm.add_constant(x1n))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    
    #model 2 grad, etc.
    model2 = sm.OLS(yn,sm.add_constant(x2n))
    model2_fit = model2.fit(disp=False)
    params2 = (model2_fit.params)
    
    model2_deriv = OLS_loglike(yn,sm.add_constant(x2n))
    ll2 = model2_deriv.loglikeobs(model2_fit.params)
    grad2 =  model2_deriv.score_obs(model2_fit.params)    
    hess2 = model2_deriv.hessian(model2_fit.params)
    
    if return_model:
        return ll1,grad1,hess1,params1,model1,ll2,grad2,hess2,params2,model2
    return ll1,grad1,hess1,params1,ll2,grad2,hess2,params2


In [3]:
def gen_data(nobs=1000, a=0.25, num_params=4):
    x = np.random.normal(scale=1., size=(nobs,1+num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x[:,0] + a/np.sqrt(num_params)*x[:,1:num_params+1].sum(axis=1) + e
    return y,x,nobs

yn,xn,nobs = gen_data()
ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_shi(yn,xn,return_model=False,num_params=15)
print(grad1.shape,hess1.shape)

(1000, 2) (2, 2)


In [4]:
num_sims = 200
trials =1000

skip_shi = True
refinement_test=True
adapt_c = False

#calc_c =lambda nobs: (10*nobs**(1/4)/np.sqrt(nobs), .2*nobs**(1/3))
#calc_c =lambda nobs: (.15*nobs**(1/4)/np.sqrt(nobs), .05*nobs**(1/3))
#calc_c =lambda nobs: (2/100*nobs**(1/4)/np.sqrt(nobs), 1/2000*nobs**(1/3))
calc_c =lambda nobs: (nobs**(-1/2), 2*(nobs)**(-4/7))


In [5]:
# Input arrays
alpha = [0.01, 0.05, 0.10, 0.15]
results = np.array([
    [[0.98, 0.02, 0.00], [0.92, 0.08, 0.00]],
    [[0.86, 0.14, 0.00], [0.66, 0.34, 0.00]],
    [[0.66, 0.34, 0.00], [0.50, 0.50, 0.00]],
    [[0.54, 0.46, 0.00], [0.41, 0.59, 0.00]]
])
# Column headers

def print_mc2(alpha_levels ,test_results ):
    table = "\\begin{tabular}{c|cccc}\n"
    table += "\\hline\n"
    table += "\\textbf{$\\alpha$} & {} & \\textbf{No selection} & \\textbf{Model 1} & \\textbf{Model 2} \\\\\n"
    table += "\\hline\n"

    for i in range(len(alpha_levels)):
        alpha = alpha_levels[i]
        table += "{$%.2f$} & \\textbf{Normal} & %.2f & %.2f & %.2f \\\\\n" % (alpha, test_results[i][0][0], test_results[i][0][1], test_results[i][0][2])
        table += "& \\textbf{Bootstrap-ND} & %.2f & %.2f & %.2f \\\\\n" % (test_results[i][1][0], test_results[i][1][1], test_results[i][1][2])
        table += "\\hline\n"

    table += "\\end{tabular}"

    print(table)

#print_mc2(alpha,results)

# evidence of power

In [6]:
a1,a2 = np.sqrt(1.09-1), 0.00
num_params= 9

def gen_data2(nobs=1000, a1=np.sqrt(1.09-1), a2=0.00 , num_params=19):
    x = np.random.normal(scale=1., size=(nobs,1+num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a1*x[:,0] + a2/np.sqrt(num_params)*x[:,1:num_params+1].sum(axis=1) + e
    return y,x,nobs

In [7]:
import vuong_tests5

setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)

res0 = vuong_tests5.monte_carlo(1,gen_data,setup_shi,trials=500,biascorrect=False)

nobs=250
a1,a2 = np.sqrt(1.09-1), 0.00
c1,c2 = calc_c(nobs)
print(c1)

cstar 256
256 cstar 36.475097597740444
256 cstar 36.475097597740444
256 cstar 36.475097597740444
0.06324555320336758


# a  = .25, k= 4,n=500

In [8]:
num_params=4
nobs=500
a1,a2 = np.sqrt(1.09**.5-1), 0.00
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))

2.5670716399216844 2.645418964843105 6.852180865710722
-6.673121654559787 2.645418964843105 -2.3880124287707485
---
4.661220604619088 0.44714340538918673 8.550356272630136
-8.243821796341262 0.44714340538918673 -4.354686128330216
---
3.3683779302864183 0.8015592884850439 7.419456939763833
-7.40117721499504 0.8015592884850439 -3.350098205517625
---
3.9722512268727006 1.6505988303156427 8.144576314023302
-7.97280255532948 1.6505988303156427 -3.800477468178877
---
2.923360407290429 1.8527069493127575 7.078803595409499
-6.956438852591941 1.8527069493127575 -2.800995664472871
---
1.727631939509313 2.6584660630554278 6.71586004729285
-6.949110701272557 2.6584660630554278 -1.9608825934890188
---
1.7617306956742376 2.2761840654256855 6.2966738237117355
-6.3340168352909005 2.2761840654256855 -1.7990737072534024
---
3.997025176896053 1.1273953366574674 8.165831465617043
-8.031774866923996 1.1273953366574674 -3.862968578203009
---
2.0331293014831475 0.347971577975038 5.44155749568474
-5.420703562

# a  = .25, k= 4, n=250

In [9]:
num_params=4
nobs=250
a1,a2 = np.sqrt(1.09-1), 0.00
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([reg,boot3])
print_mc2(alphas,np.array(test_results))

1.2368534996022043 1.4334377449321625 5.265340472499282
-5.575469694550208 1.4334377449321625 -1.5469827216531296
---
1.2899725415598513 2.209253882285993 5.513252085616067
-5.53546424139458 2.209253882285993 -1.3121846973383653
---
1.054811972609661 2.704197379795394 5.491763975256037
-5.414579039133424 2.704197379795394 -0.9776270364870483
---
0.9728041308566209 2.4023134754517477 5.693264015967922
-5.2769821090328595 2.4023134754517477 -0.5565222239215583
---
1.267548646298835 2.7642528643615614 5.325412767706119
-5.159565206067978 2.7642528643615614 -1.1017010846606943
---
4.176904067278258 0.7966123488086642 8.0152668502033
-8.088696993478402 0.7966123488086642 -4.250334210553359
---
1.8723935287696234 1.8351830806773914 5.93208628243822
-6.0330190749354475 1.8351830806773914 -1.9733263212668508
---
1.5617346397192053 1.5673079679061643 5.50641166092048
-5.831287857091707 1.5673079679061643 -1.8866108358904319
---
2.5590579055398646 1.7702821804616629 5.78870902860541
-5.601894299

# size stuff

# a  = .25, k= 4,n=250

In [10]:
num_params=4
nobs=250
a=.25 
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data(nobs=nobs, a=a,  num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))

1.1347594036018946 0.03522229377798464 5.240561755449364
-5.126023906602756 0.03522229377798464 -1.0202215547552878
---
1.4408163203921254 0.9745984621943817 5.773015837923712
-5.79106576569537 0.9745984621943817 -1.4588662481637829
---
1.2076755994093953 0.42350717732138277 5.331756835740339
-5.316257924267393 0.42350717732138277 -1.1921766879364495
---
1.6704360360997514 0.28088322896985096 5.55259024659097
-5.583796436286013 0.28088322896985096 -1.7016422257947939
---
0.7022076282465365 -0.532072681537487 5.264134234366311
-5.81859676210403 -0.532072681537487 -1.256670155984256
---
0.36433270429394726 -0.215069436978526 5.126759815053937
-5.141743586668549 -0.215069436978526 -0.3793164759085583
---
0.4938948092942791 -0.40640153517650207 4.927634803039322
-4.710182208787748 -0.40640153517650207 -0.2764422150427035
---
0.8923216049630965 -0.004849961285678141 5.05042723321687
-5.077766031453071 -0.004849961285678141 -0.9196604031992976
---
2.1693403547750125 1.2736503835865742 5.2514

# a  = .25, k= 4,n=500

###### num_params=4
nobs=500
a=.25 
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data(nobs=nobs, a=a,  num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))