In [1]:
%load_ext autoreload
%autoreload 2

from statsmodels.base.model import GenericLikelihoodModel

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import sys
import re

from scipy.interpolate import make_interp_spline, BSpline
from scipy.stats import multivariate_normal

sys.path.append("../")
import vuong_tests11 as vuong_tests_fast

In [2]:
class JointNormal1(GenericLikelihoodModel):
    
    def loglikeobs(self, params):
        data = np.concatenate([[self.endog],self.exog.transpose()],axis=0)
        mult_rv = stats.multivariate_normal([params[0], 0.0], [[1,0],[0,1]])
        return mult_rv.logpdf(data.transpose())
    
    
class JointNormal2(GenericLikelihoodModel):
    
    def loglikeobs(self, params):
        data = np.concatenate([[self.endog],self.exog.transpose()],axis=0)
        mult_rv = stats.multivariate_normal([0.0, params[0]], [[1,0],[0,1]])
        return mult_rv.logpdf(data.transpose())


def setup_shi(yn,xn):
    # model 1 grad, etc.
    nobs = yn.shape[0]
    model1_param = np.array([yn.mean()])
    model2_param = np.array([xn.mean()])
    
    model1_deriv = JointNormal1(yn,xn)
    ll1 = model1_deriv.loglikeobs(model1_param)
    grad1 =  model1_deriv.score_obs(model1_param).reshape( (nobs,1) )
    hess1 = model1_deriv.hessian(model1_param)
    
    
    model2_deriv = JointNormal2(yn,xn)
    ll2 = model2_deriv.loglikeobs(model2_param)
    grad2 =  model2_deriv.score_obs(model2_param).reshape( (nobs,1) )  
    hess2 = model2_deriv.hessian(model2_param)
    
    return ll1,grad1,hess1,model1_param,ll2,grad2,hess2,model2_param

def gen_data(beta= 1.5, nobs=1000):
    cov = [[25, 0], [0, 1]]
    data = np.random.multivariate_normal([beta,beta], [[25,0],[0,1]],  nobs)
    return data[:,0],data[:,1],nobs

yn,xn,nobs = gen_data()
ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_shi(yn,xn)
print(grad1.shape,hess1.shape)
#NOTE! Weird size distortions with shi's test when theta = .5....

(1000, 1) (1, 1)


In [3]:
num_sims = 1000
trials =1000
adapt_c = True
data_tuned_epsilon = True
epsilon = .5

In [4]:
def get_size_vector(mc_out):
    """
    Returns the size (rejection probability under the null) for each method,
    using your print order:
      Normal, Two-Step, SW Test, Naive Bootstrap, Pairwise Bootstrap, Shi (2015)
    """
    # Unpack
    reg, twostep, sw, boot1, boot2, sw_test_opt, boot3, shi = mc_out[:8]
    # Take 1 - (no selection rate)
    size_vec = [
        1 - reg[0],
        1 - twostep[0],
        1 - sw[0],
        1 - boot1[0],
        1 - boot3[0],   # boot3=Pairwise, boot1=Naive
        1 - shi[0],
    ]
    return size_vec

def run_null_size_table(sample_sizes, num_sims, trials, epsilon, data_tuned_epsilon, adapt_c,alpha=.05):
    table = []
    for nobs in sample_sizes:
        setup_shi_ex = lambda yn,xn: setup_shi(yn,xn)
        gen_data_ex = lambda : gen_data(nobs=nobs, beta=0)
        mc_out = vuong_tests_fast.monte_carlo(
            num_sims,
            gen_data_ex,
            setup_shi_ex,
            trials=trials,
            epsilon=epsilon,
            data_tuned_epsilon = data_tuned_epsilon,
            adapt_c = adapt_c,
            print_stuff=False, alpha=alpha
        )
        size_vec = get_size_vector(mc_out)
        table.append([nobs] + [f"{x:.3f}" for x in size_vec])
    # Print as LaTeX table
    print(r'\begin{tabular}{|c|c|c|c|c|c|c|}')
    print(r'\hline')
    print(r'Model &  Normal & Two-Step & SW Test & Naive Bootstrap & Pairwise Bootstrap & Shi (2015) \\ \hline \hline')
    for row in table:
        print(' & '.join(str(y) for y in row)+r' \\')
    print(r'\hline')
    print(r'\end{tabular}')
    return table

# original example from the paper

In [5]:
# Set your globals as needed
sample_sizes = [100, 200, 500]
table = run_null_size_table(
    sample_sizes=sample_sizes,
    num_sims=num_sims,                  # you set this already
    trials=trials,                      # you set this already
    epsilon=0.5,
    data_tuned_epsilon=data_tuned_epsilon,
    adapt_c=adapt_c
)

print(table)

\begin{tabular}{|c|c|c|c|c|c|c|}
\hline
Model &  Normal & Two-Step & SW Test & Naive Bootstrap & Pairwise Bootstrap & Shi (2015) \\ \hline \hline
100 & 0.000 & 0.000 & 0.061 & 0.272 & 0.045 & 0.000 \\
200 & 0.000 & 0.000 & 0.046 & 0.258 & 0.030 & 0.000 \\
500 & 0.000 & 0.000 & 0.050 & 0.272 & 0.028 & 0.000 \\
\hline
\end{tabular}
[[100, '0.000', '0.000', '0.061', '0.272', '0.045', '0.000'], [200, '0.000', '0.000', '0.046', '0.258', '0.030', '0.000'], [500, '0.000', '0.000', '0.050', '0.272', '0.028', '0.000']]


In [6]:
# Set your globals as needed
table = run_null_size_table(
    sample_sizes=sample_sizes,
    num_sims=num_sims,                  # you set this already
    trials=trials,                      # you set this already
    epsilon=0.5,
    data_tuned_epsilon=data_tuned_epsilon,
    adapt_c=adapt_c,alpha=.025
)

print(table)

\begin{tabular}{|c|c|c|c|c|c|c|}
\hline
Model &  Normal & Two-Step & SW Test & Naive Bootstrap & Pairwise Bootstrap & Shi (2015) \\ \hline \hline
100 & 0.000 & 0.000 & 0.030 & 0.210 & 0.025 & 0.000 \\
200 & 0.000 & 0.000 & 0.021 & 0.220 & 0.013 & 0.000 \\
500 & 0.000 & 0.000 & 0.033 & 0.234 & 0.017 & 0.000 \\
\hline
\end{tabular}
[[100, '0.000', '0.000', '0.030', '0.210', '0.025', '0.000'], [200, '0.000', '0.000', '0.021', '0.220', '0.013', '0.000'], [500, '0.000', '0.000', '0.033', '0.234', '0.017', '0.000']]


In [7]:
# Set your globals as needed
table = run_null_size_table(
    sample_sizes=sample_sizes,
    num_sims=num_sims,                  # you set this already
    trials=trials,                      # you set this already
    epsilon=0.5,
    data_tuned_epsilon=data_tuned_epsilon,
    adapt_c=adapt_c,alpha=.01
)

print(table)

\begin{tabular}{|c|c|c|c|c|c|c|}
\hline
Model &  Normal & Two-Step & SW Test & Naive Bootstrap & Pairwise Bootstrap & Shi (2015) \\ \hline \hline
100 & 0.000 & 0.000 & 0.008 & 0.149 & 0.011 & 0.000 \\
200 & 0.000 & 0.000 & 0.006 & 0.157 & 0.008 & 0.000 \\
500 & 0.000 & 0.000 & 0.009 & 0.174 & 0.008 & 0.000 \\
\hline
\end{tabular}
[[100, '0.000', '0.000', '0.008', '0.149', '0.011', '0.000'], [200, '0.000', '0.000', '0.006', '0.157', '0.008', '0.000'], [500, '0.000', '0.000', '0.009', '0.174', '0.008', '0.000']]
