In [23]:
%load_ext autoreload
%autoreload 2

import numpy as np
import scipy.stats as stats
import scipy.special
#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import vuong_tests4

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
class Tobit(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(Tobit,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        m = 1*(self.endog == 0) #missingness
        
        beta = params
        
        mu_y = np.matmul(x,beta)
        pr_y = stats.norm.logpdf( y, loc = mu_y)
        
       
        #if complete case, assign pr missing to all observations...
        pr_m = stats.norm.logcdf( y, loc = mu_y)
        
        #we're done if ols
        if self.ols:
            return pr_y
        else:
            ll = (1-m)*pr_y + m*pr_m
            return ll
        
    def score(self, params):
        y = self.endog
        x = self.exog
        m = 1*(self.endog == 0) #missingness
        m_x = np.repeat(m,x.shape[1]).reshape(x.shape)
        
        if ols: #if OLS use all the data...
            m, m_x = np.ones(y.shape), np.ones(x.shape)
        
        
        b = params

        beta_jac = np.zeros(len(b))
        
        #for censored
        if not ols: 
            left_stats = (y - np.dot(x, b)) 
            l_pdf = scipy.stats.norm.logpdf(left_stats)
            l_cdf = scipy.stats.norm.logcdf(left_stats)
            left_frac = np.exp(l_pdf - l_cdf)
            beta_left = np.dot(left_frac*m, x*m_x)
            beta_jac -= beta_left
        
        #for non-censored
        mid_stats = (y - np.dot(x, b))
        beta_mid = np.dot(mid_stats*(1-m), x*(1-m_x) )
        beta_jac += beta_mid
        
        # by chain rule, since the expression above is dloglik/dlogsigma
        return beta_jac

In [25]:
def setup_shi(yn,xn,return_model=False):
    model1 = Tobit(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False,xtol=1e-12,ftol=1e-12)
    ll1 = model1.loglikeobs(model1_fit.params)
    grad1 =  model1.score_obs(model1_fit.params)    
    hess1 = model1.hessian(model1_fit.params)
    params1 = model1_fit.params
    
    #fit logistic values
    model2 = Tobit(yn,sm.add_constant(xn),ols=True)
    model2_fit = model2.fit(disp=False,xtol=1e-12,ftol=1e-12)
    ll2 = model2.loglikeobs(model2_fit.params)
    grad2 =  model2.score_obs(model2_fit.params)    
    hess2 = model2.hessian(model2_fit.params)
    params2 = model2_fit.params
    if return_model:
        return ll1,grad1,hess1,params1,model1,ll2,grad2,hess2,params2,model2
    return ll1,grad1,hess1,params1,ll2,grad2,hess2,params2

In [27]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 250
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.uniform.rvs(loc=-5,scale=10,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs



mc_out = vuong_tests4.monte_carlo(5,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

-328.54628682650673
-321.4888882452892
-------
llr:-7.05739858121751, eic:-7.471541709603878, test_stat-mean:-6.179691587953655
eic-med:-7.283478064923315, test_stat-med:-6.649536928575646
-316.9691971920738
-316.6620971280325
-------
llr:-0.3071000640413646, eic:-0.5814645196511117, test_stat-mean:-0.3044385289518711
eic-med:-0.4592153908517673, test_stat-med:-0.3669980319882983
-328.2075523731653
-324.27603656883446
-------
llr:-3.9315158043308513, eic:-4.1159587983716825, test_stat-mean:-3.967684128422925
eic-med:-4.004864549694872, test_stat-med:-4.080880551357129
-334.7498936797398
-329.6519527329916
-------
llr:-5.097940946748124, eic:-5.541393304635221, test_stat-mean:-4.394301245977185
eic-med:-5.27236447144216, test_stat-med:-4.45396471897007
-337.7594475280242
-324.04772449142797
-------
llr:-13.71172303659629, eic:-14.06189459479337, test_stat-mean:-13.830740175807886
eic-med:-13.861851357530384, test_stat-med:-13.665348942813148
\begin{tabular}{|c|c|c|c|c|}
\hline
Model &  

In [24]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0.75 0.   0.25], boot1: [0.8 0.  0.2], boot2: [0.75 0.   0.25], llr:-5.012767885040398, std: 5.754050044131698, omega:0.167933448964015
[1. 0. 0.]


In [25]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 250
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0.89 0.   0.11], boot1: [0.87 0.01 0.12], boot2: [0.87 0.01 0.12], llr:-0.7866424771225516, std: 2.8242281472528368, omega:0.17472776029090753
[1. 0. 0.]


# Main examples

In [26]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0.87 0.01 0.12], boot1: [0.88 0.01 0.11], boot2: [0.86 0.01 0.13], llr:-2.4470144033068792, std: 4.078352409772033, omega:0.1682114449843301
[1. 0. 0.]


In [27]:
beta0 = 1.
beta1 = .5

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0. 1. 0.], boot1: [0. 1. 0.], boot2: [0. 1. 0.], llr:44.72371216030277, std: 11.083246736507375, omega:0.5280182162493349
[0.13 0.87 0.  ]


In [28]:
beta0 = 1.
beta1 = 1.

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0. 1. 0.], boot1: [0. 1. 0.], boot2: [0. 1. 0.], llr:220.75407502695867, std: 46.187186203191075, omega:2.026734139817817
[0.26 0.74 0.  ]


# Misc examples

In [29]:
def gen_data(beta0=beta0,beta1=1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0. 1. 0.], boot1: [0. 1. 0.], boot2: [0. 1. 0.], llr:446.7049754938491, std: 62.12020961706232, omega:2.026514472977945
[0.05 0.95 0.  ]


In [30]:
def gen_data(beta0=beta0,beta1=1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    m = beta0 + epsilon
    #censor
    y[m<=0] = 0
    return y,x,nobs
    
mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0. 0. 1.], boot1: [0. 0. 1.], boot2: [0. 0. 1.], llr:-172.8930473174962, std: 19.30239474075414, omega:1.005766944669856
[0.15 0.   0.85]


In [31]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 2
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests4.monte_carlo(1,gen_data,setup_shi)
vuong_tests4.print_mc(mc_out)

reg: [0. 0. 1.], boot1: [0. 0. 1.], boot2: [0. 0. 1.], llr:-106.7650665605856, std: 8.647186532290823, omega:0.28901176399794165
[0. 0. 1.]
