In [9]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as stats
import scipy.linalg as linalg
import matplotlib.pyplot as plt

from scipy.optimize import minimize
from scipy.stats import norm

# stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [10]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y

In [11]:
def setup_model(yn,xn):
    """setup models for ease"""
    model1 = sm.OLS(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,sm.add_constant(xn))
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)
    return ll1,grad1,hess1,params1,model1_fit.resid


def setup_test(yn,xn):
    lls = []
    grads = []
    hesss = []
    params = []
    resids = []
    for i in range(xn.shape[1]):
        ll,grad,hess,param,resid = setup_model(yn,xn[:,i])
        lls.append(ll)
        grads.append(grad)
        hesss.append(hess)
        params.append(param)
        resids.append(resid)
    
    #determine 1 and 2
    ind1 = 0
    ind2 = 1
    return (lls[ind1],grads[ind1],hesss[ind1],params[ind1],
            lls[ind2],grads[ind2],hesss[ind2],params[ind2])

In [12]:
def compute_eigen2(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2):
    """required for computing bias adjustement for the test"""
    n = ll1.shape[0]
    hess1 = hess1/n
    hess2 = hess2/n

    k1 = params1.shape[0]
    k2 = params2.shape[0]
    k = k1 + k2
    
    #A_hat:
    A_hat1 = np.concatenate([hess1,np.zeros((k2,k1))])
    A_hat2 = np.concatenate([np.zeros((k1,k2)),-1*hess2])
    A_hat = np.concatenate([A_hat1,A_hat2],axis=1)

    #B_hat, covariance of the score...
    B_hat =  np.concatenate([grad1,-grad2],axis=1) #might be a mistake here..
    B_hat = np.cov(B_hat.transpose())

    #compute eigenvalues for weighted chisq
    sqrt_B_hat= linalg.sqrtm(B_hat)
    W_hat = np.matmul(sqrt_B_hat,linalg.inv(A_hat))
    W_hat = np.matmul(W_hat,sqrt_B_hat)
    V,W = np.linalg.eig(W_hat)

    return V

def bootstrap_distr(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2,c=0,trials=500):
    nobs = ll1.shape[0]
    
    test_stats = []
    variance_stats = []
    llr = ll1-ll2
     
    for i in range(trials):
        np.random.seed()
        sample  = np.random.choice(np.arange(0,nobs),nobs,replace=True)
        llrs = llr[sample]
        test_stats.append( llrs.sum() )
        variance_stats.append( llrs.var() )


    #final product, bootstrap
    V =  compute_eigen2(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2)
    test_stats = np.array(test_stats+ V.sum()/(2))
    variance_stats = np.sqrt(np.array(variance_stats)*nobs + c*(V*V).sum())

    #set up test stat   
    omega = np.sqrt((ll1 - ll2).var()*nobs + c*(V*V).sum())
    llr = (ll1 - ll2).sum() +V.sum()/(2)
    return test_stats/variance_stats

In [66]:
def regular_test_pvalue(yn, xn, setup_test):
    ll1, grad1, hess1, params1, ll2, grad2, hess2, params2 = setup_test(yn, xn)
    nobs = ll1.shape[0]
    llr = (ll1 - ll2).sum()
    omega = np.sqrt((ll1 - ll2).var())
    test_stat = llr/(omega*np.sqrt(nobs))
    pvalue = 2*(1-stats.norm.cdf(np.abs(test_stat)))
    return 1*(test_stat >= 0) + 2*(test_stat <= 0), pvalue


def bootstrap_test_pvalue(test_stats):
    test_stats = test_stats.copy()
    test_stats.sort() 
    geq_0 = (test_stats > 0).argmax()
    leq_0 = (test_stats[::-1] >=0).argmin()
    result,pvalue = 0, 0

    if (test_stats[leq_0] >= 0) or (geq_0-leq_0 > 0):
        result,pvalue = 1, 1-(geq_0-leq_0)/test_stats.shape[0]
        pvalue = pvalue*(test_stats[leq_0] < 0)
    if (test_stats[geq_0] <= 0) or (geq_0-leq_0 <0):
        result,pvalue = 2, 1-(leq_0-geq_0)/test_stats.shape[0]
        pvalue = pvalue*(test_stats[geq_0] > 0)
    return  result, pvalue

    
def test_table2(yn,xn,setup_test, slow=False, trials=1000):
    
    #bootstrap cv
    test_stats = None
    ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_test(yn,xn)
    test_stats = bootstrap_distr(ll1,grad1,hess1,params1,ll2,grad2,hess2,params2,trials=trials)
    result_class, alpha_class = regular_test_pvalue(yn,xn,setup_test)
    print(result_class, alpha_class)
    result_boot, alpha_boot = bootstrap_test_pvalue(test_stats)
    print(result_boot, alpha_boot)

    #print('\\begin{center}\n\\begin{tabular}{ccc}\n\\toprule')
    #print('\\textbf{Version} & \\textbf{Result} & \\textbf{Signficance Level}\\\\ \\midrule' )
    #print('\\bottomrule\n\\end{tabular}\n\\end{center}')

def gen_data(nobs=1000, a=0.25):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*x.sum(axis=1) + e
    return y,x,nobs

def gen_data2(nobs=1000, a=0.25, scaler = .5):
    num_params=2
    x = np.random.normal(scale=1., size=(nobs,num_params))
    e = np.random.normal(loc=0.0, scale=1.0, size=nobs)
    y = 1 + a*scaler*x[:,0] + a*x.sum(axis=1) + e 
    return y,x,nobs
            

yn,xn,nobs = gen_data2()
test_table2(yn,xn,setup_test)

yn,xn,nobs = gen_data()
test_table2(yn,xn,setup_test)

1 0.0014630876631336331
1 0.0
1 0.38833711052860775
2 0.368
