In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests9 as vuong_tests_fast

In [2]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y


def setup_shi(yn,xn,return_model=False,num_params=4):
    x1n,x2n = xn[:,0],xn[:,1:num_params+1]
    
    # model 1 grad, etc.
    model1 = sm.OLS(yn,sm.add_constant(x1n))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    
    model1_deriv = OLS_loglike(yn,sm.add_constant(x1n))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    
    #model 2 grad, etc.
    model2 = sm.OLS(yn,sm.add_constant(x2n))
    model2_fit = model2.fit(disp=False)
    params2 = (model2_fit.params)
    
    model2_deriv = OLS_loglike(yn,sm.add_constant(x2n))
    ll2 = model2_deriv.loglikeobs(model2_fit.params)
    grad2 =  model2_deriv.score_obs(model2_fit.params)    
    hess2 = model2_deriv.hessian(model2_fit.params)
    
    if return_model:
        return ll1,grad1,hess1,params1,model1,ll2,grad2,hess2,params2,model2
    return ll1,grad1,hess1,params1,ll2,grad2,hess2,params2


In [3]:
def gen_data(nobs=1000, a=0.25, num_params=4):
    x = np.random.normal(scale=1., size=(nobs,1+num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x[:,0] + a/np.sqrt(num_params)*x[:,1:num_params+1].sum(axis=1) + e
    return y,x,nobs

yn,xn,nobs = gen_data()
ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_shi(yn,xn,return_model=False,num_params=15)
print(grad1.shape,hess1.shape)

(1000, 2) (2, 2)


In [4]:
num_sims = 200
trials =1000

skip_shi = True
refinement_test=True
adapt_c = False

data_tuned_c = .9
calc_c =lambda nobs: (nobs**(-1/2), nobs**(-4/7)/350)


In [5]:
# Input arrays
alpha = [0.01, 0.05, 0.10, 0.15]
results = np.array([
    [[0.98, 0.02, 0.00], [0.92, 0.08, 0.00]],
    [[0.86, 0.14, 0.00], [0.66, 0.34, 0.00]],
    [[0.66, 0.34, 0.00], [0.50, 0.50, 0.00]],
    [[0.54, 0.46, 0.00], [0.41, 0.59, 0.00]]
])
# Column headers

def print_mc2(alpha_levels ,test_results ):
    table = "\\begin{tabular}{c|cccc}\n"
    table += "\\hline\n"
    table += "\\textbf{$\\alpha$} & {} & \\textbf{No selection} & \\textbf{Model 1} & \\textbf{Model 2} \\\\\n"
    table += "\\hline\n"

    for i in range(len(alpha_levels)):
        alpha = alpha_levels[i]
        table += "{$%.2f$} & \\textbf{Normal} & %.2f & %.2f & %.2f \\\\\n" % (alpha, test_results[i][0][0], test_results[i][0][1], test_results[i][0][2])
        table += "& \\textbf{Bootstrap-ND} & %.2f & %.2f & %.2f \\\\\n" % (test_results[i][1][0], test_results[i][1][1], test_results[i][1][2])
        table += "\\hline\n"

    table += "\\end{tabular}"

    print(table)

#print_mc2(alpha,results)

# evidence of power

In [6]:
a1,a2 = np.sqrt(1.09-1), 0.00
num_params= 9

def gen_data2(nobs=1000, a1=np.sqrt(1.09-1), a2=0.00 , num_params=19):
    x = np.random.normal(scale=1., size=(nobs,1+num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a1*x[:,0] + a2/np.sqrt(num_params)*x[:,1:num_params+1].sum(axis=1) + e
    return y,x,nobs

In [7]:
import vuong_tests5

setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)

res0 = vuong_tests5.monte_carlo(1,gen_data,setup_shi,trials=500,biascorrect=False)

nobs=250
a1,a2 = np.sqrt(1.09-1), 0.00
c1,c2 = calc_c(nobs)
print(c1)

cstar 256
256 cstar 36.611480017606944
256 cstar 36.611480017606944
256 cstar 36.611480017606944
0.06324555320336758


# a  = .25, k= 4,n=500

In [8]:
num_params=4
nobs=500
a1,a2 = np.sqrt(1.09**.5-1), 0.00
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,
                                          data_tuned_c=data_tuned_c,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))

Quantiles: 1th: 0.83, 5th: 1.14, 10th: 1.33, 30th: 1.82, 50th: 2.19, 70th: 2.58, 90th: 3.17, 95th: 3.48, 99th: 4.04
bonus term 0.07080849632517339 // variance stats 4.8060141384909265 //part1 0.4111151307892395 //part2 5.8060141384909265 4.8060141384909265
-1.7536371449161916 0.9930137548451673 2.1075170310648015
-1.8952541375665384 0.9930137548451673 1.9659000384144547
---
Quantiles: 1th: 0.75, 5th: 1.05, 10th: 1.23, 30th: 1.65, 50th: 1.98, 70th: 2.34, 90th: 2.92, 95th: 3.19, 99th: 3.70
bonus term 0.06926470586528424 // variance stats 4.048087336186329 //part1 0.3496542845232123 //part2 5.048087336186329 4.048087336186329
-2.0344815602623445 2.0041510229562474 2.2973472660736256
-2.1730109719929134 2.0041510229562474 2.1588178543430567
---
Quantiles: 1th: 0.76, 5th: 1.11, 10th: 1.29, 30th: 1.75, 50th: 2.10, 70th: 2.47, 90th: 3.05, 95th: 3.34, 99th: 3.89
bonus term 0.05660183587529651 // variance stats 5.739939162658537 //part1 0.38149293029428194 //part2 6.739939162658537 5.7399391626

Quantiles: 1th: 0.74, 5th: 1.03, 10th: 1.22, 30th: 1.68, 50th: 2.03, 70th: 2.39, 90th: 2.96, 95th: 3.24, 99th: 3.83
bonus term 0.08098576898038912 // variance stats 3.4394376276004164 //part1 0.35953127011169406 //part2 4.439437627600416 3.4394376276004164
-1.195348553369753 1.086905495957661 1.409379563389736
-1.357320091330531 1.086905495957661 1.2474080254289575
---
Quantiles: 1th: 0.79, 5th: 1.09, 10th: 1.31, 30th: 1.75, 50th: 2.12, 70th: 2.51, 90th: 3.11, 95th: 3.42, 99th: 4.03
bonus term 0.07172511714539681 // variance stats 4.536460019313777 //part1 0.3971032434560865 //part2 5.536460019313777 4.536460019313777
-1.253890400079799 2.000456463895202 1.4846392636047572
-1.3973406343705925 2.000456463895202 1.3411890293139637
---
Quantiles: 1th: 0.75, 5th: 1.04, 10th: 1.23, 30th: 1.65, 50th: 2.00, 70th: 2.38, 90th: 2.94, 95th: 3.24, 99th: 3.83
bonus term 0.06771088283938026 // variance stats 4.239780804727738 //part1 0.3547901841729535 //part2 5.239780804727738 4.239780804727738
-1.

# a  = .25, k= 4, n=250

In [9]:
num_params=4
nobs=250
a1,a2 = np.sqrt(1.09-1), 0.00
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data2(nobs=nobs, a1=a1, a2=a2, num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,
                                          data_tuned_c=data_tuned_c,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([reg,boot3])
print_mc2(alphas,np.array(test_results))

Quantiles: 1th: 0.76, 5th: 1.05, 10th: 1.21, 30th: 1.63, 50th: 1.97, 70th: 2.32, 90th: 2.91, 95th: 3.21, 99th: 3.85
bonus term 0.04639169732508386 // variance stats 4.559074530965113 //part1 0.25789490304811596 //part2 5.559074530965113 4.559074530965113
-2.0693749525896603 1.179540842233469 2.3057664772840245
-2.162158347239828 1.179540842233469 2.212983082633857
---
Quantiles: 1th: 0.79, 5th: 1.16, 10th: 1.38, 30th: 1.85, 50th: 2.24, 70th: 2.64, 90th: 3.32, 95th: 3.62, 99th: 4.22
bonus term 0.06676441164765665 // variance stats 4.016180782683631 //part1 0.33490235867415447 //part2 5.016180782683631 4.016180782683631
-2.0942658970459034 1.3022944593779644 2.0616919513933656
-2.2277947203412163 1.3022944593779644 1.9281631280980525
---
Quantiles: 1th: 0.83, 5th: 1.11, 10th: 1.29, 30th: 1.74, 50th: 2.09, 70th: 2.45, 90th: 3.03, 95th: 3.29, 99th: 3.89
bonus term 0.040554389924297045 // variance stats 5.884827671464814 //part1 0.27920998595017416 //part2 6.884827671464814 5.88482767146481

Quantiles: 1th: 0.79, 5th: 1.09, 10th: 1.27, 30th: 1.70, 50th: 2.06, 70th: 2.46, 90th: 3.06, 95th: 3.40, 99th: 4.05
bonus term 0.05364284611941753 // variance stats 4.326665525809664 //part1 0.285737499130614 //part2 5.326665525809663 4.326665525809664
-1.186688300613637 2.031090421249665 1.4104539688542013
-1.2939739928524723 2.031090421249665 1.303168276615366
---
Quantiles: 1th: 0.82, 5th: 1.11, 10th: 1.28, 30th: 1.70, 50th: 2.02, 70th: 2.38, 90th: 2.96, 95th: 3.27, 99th: 3.82
bonus term 0.039900036650731045 // variance stats 5.672153538025387 //part1 0.2662191707065177 //part2 6.672153538025386 5.672153538025387
-1.399636588749991 2.8440543272974432 1.4718814998569745
-1.479436662051453 2.8440543272974432 1.3920814265555126
---
Quantiles: 1th: 0.81, 5th: 1.07, 10th: 1.24, 30th: 1.67, 50th: 1.99, 70th: 2.36, 90th: 2.93, 95th: 3.21, 99th: 3.78
bonus term 0.041440195083729515 // variance stats 5.303233087648812 //part1 0.2612072088103855 //part2 6.303233087648811 5.303233087648812
-1.

# size stuff

# a  = .25, k= 4,n=250

In [10]:
num_params=4
nobs=250
a=.25 
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data(nobs=nobs, a=a,  num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,
                                          data_tuned_c=data_tuned_c,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))

Quantiles: 1th: 0.80, 5th: 1.09, 10th: 1.30, 30th: 1.75, 50th: 2.11, 70th: 2.47, 90th: 3.09, 95th: 3.38, 99th: 3.95
bonus term 0.04897852465059117 // variance stats 4.926585697328423 //part1 0.29027542367044123 //part2 5.926585697328423 4.926585697328423
-1.8642808063420901 -1.0771417931970326 1.9997643650322898
-1.9622378556432725 -1.0771417931970326 1.9018073157311075
---
Quantiles: 1th: 0.86, 5th: 1.18, 10th: 1.39, 30th: 1.87, 50th: 2.26, 70th: 2.67, 90th: 3.37, 95th: 3.75, 99th: 4.39
bonus term 0.05667955274866036 // variance stats 5.096297600088429 //part1 0.3455354213957436 //part2 6.096297600088429 5.096297600088429
-2.007167902802278 -0.3225020020518389 2.338727430942747
-2.1205270082995993 -0.3225020020518389 2.2253683254454257
---
Quantiles: 1th: 0.89, 5th: 1.16, 10th: 1.34, 30th: 1.78, 50th: 2.11, 70th: 2.48, 90th: 3.04, 95th: 3.32, 99th: 3.92
bonus term 0.039749913148412426 // variance stats 6.063119944087782 //part1 0.28075840433430893 //part2 7.063119944087781 6.063119944

Quantiles: 1th: 0.87, 5th: 1.22, 10th: 1.41, 30th: 1.86, 50th: 2.20, 70th: 2.56, 90th: 3.15, 95th: 3.47, 99th: 4.11
bonus term 0.04074023415068433 // variance stats 6.418961643215289 //part1 0.3022502344995367 //part2 7.418961643215289 6.418961643215289
-1.3791828630583018 0.8020190421312985 1.5107260975533707
-1.4606633313596706 0.8020190421312985 1.429245629252002
---
Quantiles: 1th: 0.85, 5th: 1.14, 10th: 1.32, 30th: 1.79, 50th: 2.13, 70th: 2.53, 90th: 3.15, 95th: 3.48, 99th: 4.13
bonus term 0.04810832233376754 // variance stats 5.298619319903812 //part1 0.3030160084996283 //part2 6.298619319903811 5.298619319903812
-1.2588629345044988 -1.2101357546836415 1.3914039218530891
-1.3550795791720338 -1.2101357546836415 1.2951872771855542
---
Quantiles: 1th: 0.88, 5th: 1.19, 10th: 1.39, 30th: 1.85, 50th: 2.23, 70th: 2.65, 90th: 3.28, 95th: 3.61, 99th: 4.28
bonus term 0.04829590505880224 // variance stats 5.77784788734027 //part1 0.32734229806998905 //part2 6.77784788734027 5.77784788734027

# a  = .25, k= 4,n=500

###### num_params=4
nobs=500
a=.25 
c1,c2 = calc_c(nobs)
alphas = [.01,.05,.1,.15]
test_results = []

for alpha in alphas:
    setup_shi_ex  = lambda yn,xn: setup_shi(yn,xn,num_params=num_params)
    gen_data_ex = lambda : gen_data(nobs=nobs, a=a,  num_params=num_params)
    mc_out = vuong_tests_fast.monte_carlo(num_sims,gen_data_ex,setup_shi,trials=trials,c1=c1,c2=c2,adapt_c=adapt_c,
                                          skip_shi=skip_shi, refinement_test=refinement_test,alpha=alpha)
    reg,twostep, refine_test, boot1,boot2,boot3,shi, llr,std, omega = mc_out
    test_results.append([refine_test,boot3])
print_mc2(alphas,np.array(test_results))