In [40]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests5

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y

In [42]:
def gen_data(nobs=1000, a=0.25, num_params=4):
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x.sum(axis=1) + e
    return y,x,nobs

In [43]:
def setup_model(yn,xn):
    """setup models for ease"""
    model1 = sm.OLS(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,sm.add_constant(xn))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    return ll1,grad1,hess1,params1


def setup_test(yn,xn):
    lls = []
    grads = []
    hesss = []
    params = []                
    for i in range(xn.shape[1]):
        ll,grad,hess,param = setup_model(yn,xn[:,i])
        lls.append(ll)
        grads.append(grad)
        hesss.append(hess)
        params.append(param)
    
    #determine 1 and 2
    lls_sum = np.array(lls).sum(axis=1)
    lls_sorted = lls_sum.copy()
    lls_sorted.sort()
    ind1,ind2 = lls_sorted[-2:]
    ind1 = (lls_sum == ind1).argmax()
    ind2 = (lls_sum == ind2).argmax()
    return lls[ind1],grads[ind1],hesss[ind1],params[ind1], lls[ind2],grads[ind2],hesss[ind2],params[ind2]

yn,xn,nobs = gen_data()
ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_test(yn,xn)

In [47]:
def monte_carlo(total,gen_data,setup_test,trials=100):
    reg = np.array([0, 0 ,0])
    omega = 0
    llr = 0
    var = 0

    for i in range(total):
        
        #setup data
        np.random.seed()
        yn,xn,nobs = gen_data()
        
        #update llr and summary stats
        ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_test(yn,xn)
        llrn = (ll1 - ll2).sum()
        omegan = np.sqrt( (ll1 -ll2).var())
        llr = llr +llrn
        var = llrn**2 + var
        omega = omega +omegan
    
        #shi/twosteptest
        reg_index = vuong_tests5.two_step_test(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2,biascorrect=True)
        reg[reg_index] = reg[reg_index] + 1

    return  reg/total,llr/total,np.sqrt( (var/total-(llr/total)**2) ),omega*np.sqrt(nobs)/total



(array([0.97, 0.  , 0.03]), -5.552828560970343, 5.201526707776683, 10.897773505553026)


# K = 4

In [48]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0, num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -0.6982014874948568, 0.6116663561069424, 1.744702044256684)


In [49]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.25, num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -5.8202029903092525, 4.878721812795434, 11.135646672858215)


In [50]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.5, num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.99, 0.  , 0.01]), -9.197461104627488, 6.792073128648582, 17.057531072237044)


# K = 9

In [51]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0, num_params=9)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -0.8179191137507064, 0.8590012951745671, 2.3718762326036202)


In [52]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.25, num_params=9)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -4.095790169084625, 3.608453249462736, 10.542482068078154)


In [53]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.5, num_params=9)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -6.193037281131777, 5.123248769387499, 14.170242031351457)


# K = 19

In [54]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0, num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -0.9811162243925549, 1.1420726481200485, 2.7888781547456434)


In [55]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.25, num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -3.2010632171332385, 2.8290825732471903, 9.853952891887156)


In [56]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.5, num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -4.025449596320765, 3.2298077901506557, 11.52022145110401)


# Power

In [65]:
def gen_data2(nobs=1000, a=0.25, scaler = .1, num_params=4):
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*scaler*x[:,0] + a*x.sum(axis=1) + e 
    return y,x,nobs

# K = 19

seems like the big issue is power, the second alternative is going to seem better and better... how to model that?

In [81]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= .1, num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -3.7942825726109586, 3.0324626137231245, 9.879441275336694)


In [82]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25,  scaler= .5,  num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.96, 0.  , 0.04]), -7.620765915397299, 6.4619635346160385, 10.582802552106102)


In [83]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= 1,  num_params=19)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.19, 0.  , 0.81]), -32.220760295281984, 9.03036645757417, 12.476992130000836)


# K = 4

In [84]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= .1, num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), -6.6345167136836825, 4.761114518061067, 11.300711192113827)


In [85]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25,  scaler= .5,  num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.56, 0.  , 0.44]), -24.62915753520873, 11.805464909345865, 13.03695233195193)


In [86]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= 1,  num_params=4)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0., 0., 1.]), -66.06376583143705, 14.578705095782349, 15.239359475178127)
