In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests5

In [2]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y

In [3]:
def setup_model(yn,xn):
    """setup models for ease"""
    model1 = sm.OLS(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,sm.add_constant(xn))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    return ll1,grad1,hess1,params1,model1_fit.resid


def setup_test(yn,xn):
    lls = []
    grads = []
    hesss = []
    params = []
    resids = []
    for i in range(xn.shape[1]):
        ll,grad,hess,param,resid = setup_model(yn,xn[:,i])
        lls.append(ll)
        grads.append(grad)
        hesss.append(hess)
        params.append(param)
        resids.append(resid)
    
    #determine 1 and 2
    ind1 = 0
    ind2 = 1
    return (lls[ind1],grads[ind1],hesss[ind1],params[ind1],resids[ind1],
            lls[ind2],grads[ind2],hesss[ind2],params[ind2],resids[ind2])

def gen_data(nobs=1000, a=0.25):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x.sum(axis=1) + e
    return y,x,nobs


yn,xn,nobs = gen_data()

In [4]:
def test_mse(resid1,resid2):
    nobs = resid1.shape[0]
    llr = -1*(resid1**2 - resid2**2).sum()
    omega = np.clip(np.sqrt((resid1**2 - resid2**2).var()),.1,10000)
    test_stat = llr/(omega*np.sqrt(nobs))
    return 1*(test_stat >= 1.96) + 2*( test_stat <= -1.96)


In [5]:
def monte_carlo(total,gen_data,setup_test,trials=100):
    reg = np.array([0, 0 ,0])
    mse = np.array([0, 0 ,0])
    omega = 0
    llr = 0
    var = 0

    for i in range(total):
        
        #setup data
        np.random.seed()
        yn,xn,nobs = gen_data()
        
        #update llr and summary stats
        ll1,grad1,hess1,params1,resid1,ll2,grad2,hess2,params2,resid2 = setup_test(yn,xn)
        llrn = (ll1 - ll2).sum()
        omegan = np.sqrt( (ll1 -ll2).var())
        llr = llr +llrn
        var = llrn**2 + var
        omega = omega +omegan
    
        reg_index = vuong_tests5.two_step_test(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2,biascorrect=True)
        mse_index = test_mse(resid1,resid2)
        
        reg[reg_index] = reg[reg_index] + 1
        mse[mse_index] = mse[mse_index] + 1

    return  reg/total,mse/total,llr/total,np.sqrt( (var/total-(llr/total)**2) ),omega*np.sqrt(nobs)/total



In [6]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), array([1., 0., 0.]), -0.18010240005192807, 1.0720879936299312, 1.2931165769639787)


In [7]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.25)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.97, 0.  , 0.03]), array([0.97, 0.  , 0.03]), -2.273409002547001, 10.029879771688, 10.623423689904703)


In [8]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.5)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.95, 0.02, 0.03]), array([0.95, 0.02, 0.03]), -0.09650743916905537, 19.000952106919705, 19.132890949414087)


# Power!

In [9]:
def gen_data2(nobs=1000, a=0.25, scaler = .1):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*scaler*x[:,0] + a*x.sum(axis=1) + e 
    return y,x,nobs

In [10]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= .1)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.94, 0.05, 0.01]), array([0.94, 0.05, 0.01]), 3.966567449097104, 10.178513652528922, 11.26103310107324)


In [11]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= .5)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.19, 0.81, 0.  ]), array([0.19, 0.81, 0.  ]), 35.967258133218756, 12.003773928180033, 13.284615604173032)


In [12]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25, scaler= 1)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0., 1., 0.]), array([0., 1., 0.]), 82.57533241987794, 15.258774089270368, 15.693458179366692)
