In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests5

In [2]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y

In [3]:
def setup_model(yn,xn):
    """setup models for ease"""
    model1 = sm.OLS(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,sm.add_constant(xn))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    return ll1,grad1,hess1,params1,model1_fit.resid


def setup_test(yn,xn):
    lls = []
    grads = []
    hesss = []
    params = []
    resids = []
    
    x1 = np.array([xn,xn**2,xn**3]).transpose()
    x2 = np.array([xn,xn**2,xn**3,xn**4]).transpose()
    for xi in (x1,x2) : 
        #fit cubic and quadratic terms?
        ll,grad,hess,param,resid = setup_model(yn,xi)
        lls.append(ll)
        grads.append(grad)
        hesss.append(hess)
        params.append(param)
        resids.append(resid)
    
    #determine 1 and 2
    ind1 = 0
    ind2 = 1
    return (lls[ind1],grads[ind1],hesss[ind1],params[ind1],resids[ind1],
            lls[ind2],grads[ind2],hesss[ind2],params[ind2],resids[ind2])

def gen_data(nobs=1000, a=0.25):
    x = np.random.normal(scale=1., size=(nobs))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*(x + x**2) + e #truth is quadratic
    return y,x,nobs

yn,xn,nobs = gen_data()

In [4]:
def test_mse(resid1,resid2):
    nobs = resid1.shape[0]
    llr = -1*(resid1**2 - resid2**2).sum()
    omega = np.clip(np.sqrt((resid1**2 - resid2**2).var()),.1,10000)
    test_stat = llr/(omega*np.sqrt(nobs))
    return 1*(test_stat >= 1.96) + 2*( test_stat <= -1.96)


In [5]:
def monte_carlo(total,gen_data,setup_test,trials=100):
    reg = np.array([0, 0 ,0])
    mse = np.array([0, 0 ,0])
    omega = 0
    llr = 0
    var = 0

    for i in range(total):
        
        #setup data
        np.random.seed()
        yn,xn,nobs = gen_data()
        
        #update llr and summary stats
        ll1,grad1,hess1,params1,resid1,ll2,grad2,hess2,params2,resid2 = setup_test(yn,xn)
        llrn = (ll1 - ll2).sum()
        omegan = np.sqrt( (ll1 -ll2).var())
        llr = llr +llrn
        var = llrn**2 + var
        omega = omega +omegan
    
        reg_index = vuong_tests5.two_step_test(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2,biascorrect=True)
        mse_index = test_mse(resid1,resid2)
        
        reg[reg_index] = reg[reg_index] + 1
        mse[mse_index] = mse[mse_index] + 1

    return  reg/total,mse/total,llr/total,np.sqrt( (var/total-(llr/total)**2) ),omega*np.sqrt(nobs)/total



In [6]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), array([1., 0., 0.]), -0.5472102921766036, 0.8060465561677376, 0.7665167938874334)


In [7]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.25)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), array([1., 0., 0.]), -0.4947951150154822, 0.6379632887406606, 0.7628863929462543)


In [8]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.5)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), array([1., 0., 0.]), -0.4686012770567757, 0.7132806283275269, 0.7315424800267702)


# Power

In [9]:
def gen_data2(nobs=1000, a=0.25):
    x = np.random.normal(scale=1., size=nobs)
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*(x + x**2 + x**3 + x**4) + e
    return y,x,nobs

yn,xn,nobs = gen_data2()
print(yn.shape,xn.shape)

(1000,) (1000,)


In [10]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.0)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([1., 0., 0.]), array([1., 0., 0.]), -0.46061447204918815, 0.5772820451191526, 0.7252030303859448)


In [11]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.25)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.39, 0.  , 0.61]), array([0., 0., 1.]), -401.4838321372878, 136.2072291567747, 75.65445033414868)


In [12]:
gen_data_ex = lambda : gen_data2(nobs=1000, a=0.5)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.31, 0.  , 0.69]), array([0., 0., 1.]), -835.5054292955984, 185.7054042521751, 110.01629369376282)


In [13]:
gen_data_ex = lambda : gen_data2(nobs=2000, a=.5)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.08, 0.  , 0.92]), array([0., 0., 1.]), -1814.7955000758604, 310.6597633637663, 209.0461413849191)
