In [33]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
import scipy.stats as stats
import sys

sys.path.append("../")
import vuong_tests5

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y

In [35]:
def setup_model(yn,xn):
    """setup models for ease"""
    model1 = sm.OLS(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,sm.add_constant(xn))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    return ll1,grad1,hess1,params1,model1_fit.resid


def setup_test(yn,xn):
    lls = []
    grads = []
    hesss = []
    params = []
    resids = []
    for i in range(xn.shape[1]):
        ll,grad,hess,param,resid = setup_model(yn,xn[:,i])
        lls.append(ll)
        grads.append(grad)
        hesss.append(hess)
        params.append(param)
        resids.append(resid)
    
    #determine 1 and 2
    ind1 = 0
    ind2 = 1
    return (lls[ind1],grads[ind1],hesss[ind1],params[ind1],resids[ind1],
            lls[ind2],grads[ind2],hesss[ind2],params[ind2],resids[ind2])

In [36]:
def test_mse(resid1,resid2):
    nobs = resid1.shape[0]
    llr = -1*(resid1**2 - resid2**2).sum()
    omega = np.clip(np.sqrt((resid1**2 - resid2**2).var()),.1,10000)
    test_stat = llr/(omega*np.sqrt(nobs))
    return 1*(test_stat >= 1.96) + 2*( test_stat <= -1.96)

def regular_test(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2):
    nobs = ll1.shape[0]
    omega = np.sqrt((ll1 -ll2).var())
    llr = (ll1 - ll2).sum()
    test_stat = llr/(omega*np.sqrt(nobs))
    return 1*(test_stat >= 1.96) + 2*( test_stat <= -1.96)


In [37]:
def monte_carlo(total,gen_data,setup_test,trials=100):
    reg = np.array([0, 0 ,0])
    mse = np.array([0, 0 ,0])
    omega = 0
    llr = 0
    var = 0

    for i in range(total):
        
        #setup data
        np.random.seed()
        yn,xn,nobs = gen_data()
        
        #update llr and summary stats
        ll1,grad1,hess1,params1,resid1,ll2,grad2,hess2,params2,resid2 = setup_test(yn,xn)
        llrn = (ll1 - ll2).sum()
        omegan = np.sqrt( (ll1 -ll2).var())
        llr = llr +llrn
        var = llrn**2 + var
        omega = omega +omegan
    
        reg_index = regular_test(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2)
        mse_index = test_mse(resid1,resid2)
        
        reg[reg_index] = reg[reg_index] + 1
        mse[mse_index] = mse[mse_index] + 1

    return  reg/total,mse/total,llr/total,np.sqrt( (var/total-(llr/total)**2) ),omega*np.sqrt(nobs)/total



In [38]:
def gen_data(nobs=1000, a=0.25):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x.sum(axis=1)
    return y,x,nobs


yn,xn,nobs = gen_data()

In [44]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.0)
print(monte_carlo(100,gen_data_ex,setup_test))

  test_stat = llr/(omega*np.sqrt(nobs))


(array([0.09, 0.42, 0.49]), array([1., 0., 0.]), nan, nan, nan)


In [47]:
gen_data_ex = lambda : gen_data(nobs=1000, a=0.1)
print(monte_carlo(1000,gen_data_ex,setup_test))

(array([0.957, 0.026, 0.017]), array([1., 0., 0.]), 0.15940412160627052, 30.633867637823766, 31.598848052500486)


In [48]:
gen_data_ex = lambda : gen_data(nobs=1000, a=1.0)
print(monte_carlo(1000,gen_data_ex,setup_test))

(array([0.948, 0.027, 0.025]), array([0.95 , 0.026, 0.024]), 0.4387830602936964, 31.425148271382803, 31.52626350233343)


In [52]:
def gen_data(nobs=1000, a=0.25):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.pareto(1.5, size=nobs)
    y = 1 + a*x.sum(axis=1) + e
    return y,x,nobs


gen_data_ex = lambda : gen_data(nobs=100, a=0.0)
print(monte_carlo(100,gen_data_ex,setup_test))

(array([0.99, 0.01, 0.  ]), array([1., 0., 0.]), 0.024584422381902372, 0.8364774270758268, 0.9527563134697778)


In [53]:
gen_data_ex = lambda : gen_data(nobs=100, a=0.1)
print(monte_carlo(1000,gen_data_ex,setup_test))

(array([0.993, 0.003, 0.004]), array([1., 0., 0.]), 0.04855386456241457, 1.1308121614527296, 1.1226146947872764)


In [54]:
gen_data_ex = lambda : gen_data(nobs=100, a=1.0)
print(monte_carlo(1000,gen_data_ex,setup_test))

(array([0.862, 0.08 , 0.058]), array([0.977, 0.015, 0.008]), 0.22264581471945702, 4.181187665651652, 3.128958085265526)
