In [6]:
%load_ext autoreload
%autoreload 2

import numpy as np
import scipy.stats as stats
import scipy.special
#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import vuong_tests3

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


yn,xn,nobs = gen_data()
print(xn.shape)
print(sm.add_constant(xn).shape)
print(scipy.stats.mode(yn))

(1000,)
(1000, 2)
ModeResult(mode=array([0.]), count=array([75]))


In [8]:
class Tobit(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(Tobit,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        m = 1*(self.endog == 0) #missingness
        
        beta = params
        
        mu_y = np.matmul(x,beta)
        
        pr_y = stats.norm.logpdf( y, loc = mu_y)
        
       
        #if complete case, assign pr missing to all observations...
        pr_m = stats.norm.logcdf( y, loc = mu_y)
        
        #we're done if ols
        if self.ols:
            return pr_y
        else:
            ll = (1-m)*pr_y + m*pr_m
            return ll
        
    def score(self, params):
        y = self.endog
        x = self.exog
        m = 1*(self.endog == 0) #missingness
        m_x = np.repeat(m,x.shape[1]).reshape(x.shape)
        
        if ols: #if OLS use all the data...
            m, m_x = np.ones(y.shape), np.ones(x.shape)
        
        
        b = params

        beta_jac = np.zeros(len(b))
        
        #for censored
        if not ols: 
            left_stats = (y - np.dot(x, b)) 
            l_pdf = scipy.stats.norm.logpdf(left_stats)
            l_cdf = scipy.stats.norm.logcdf(left_stats)
            left_frac = np.exp(l_pdf - l_cdf)
            beta_left = np.dot(left_frac*m, x*m_x)
            beta_jac -= beta_left
        
        #for non-censored
        mid_stats = (y - np.dot(x, b))
        beta_mid = np.dot(mid_stats*(1-m), x*(1-m_x) )
        beta_jac += beta_mid
        
        # by chain rule, since the expression above is dloglik/dlogsigma
        return beta_jac



model1 =  Tobit(yn,sm.add_constant(xn))
model1_fit = model1.fit(disp=False)
print(model1_fit.summary())

model2 =  Tobit(yn,sm.add_constant(xn),ols=True)
model2_fit = model2.fit(disp=False)
print(model2_fit.summary())

                                Tobit Results                                 
Dep. Variable:                      y   Log-Likelihood:                -1373.3
Model:                          Tobit   AIC:                             2751.
Method:            Maximum Likelihood   BIC:                             2760.
Date:                Mon, 05 Apr 2021                                         
Time:                        18:24:46                                         
No. Observations:                1000                                         
Df Residuals:                     998                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9668      0.048     20.225      0.000       0.873       1.061
x1             0.2542      0.007     37.813      0.0

In [9]:
def setup_shi(yn,xn):
    model1 = Tobit(yn,sm.add_constant(xn))
    model1_fit = model1.fit(disp=False,xtol=1e-12,ftol=1e-12)
    ll1 = model1.loglikeobs(model1_fit.params)
    grad1 =  model1.score_obs(model1_fit.params)    
    hess1 = model1.hessian(model1_fit.params)
    params1 = (model1_fit.params)
    
    #fit logistic values
    model2 = Tobit(yn,sm.add_constant(xn),ols=True)
    model2_fit = model2.fit(disp=False,xtol=1e-12,ftol=1e-12)
    ll2 = model2.loglikeobs(model2_fit.params)
    grad2 =  model2.score_obs(model2_fit.params)    
    hess2 = model2.hessian(model2_fit.params)
    params2 = (model2_fit.params)
    
    return ll1,grad1,hess1,params1,ll2, grad2,hess2,params2

In [72]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.uniform.rvs(loc=-3,scale=6,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(1,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

-0.47970371908796927 0.46867042410578097 16.532251023994103
-0.45861065158231407 0.44806250196508474 17.292625615527015
-0.3900134723413326 0.3810430734969152 20.334123981658664
-0.24401703514572134 0.2384045876655132 32.50011744618526
-0.0977104797659026 0.09546311643889314 81.16409130429165
-0.028752747017843795 0.02809142727656839 275.81998673671717
-0.007521106428331753 0.00734811926455911 1054.4435684664193
-0.0019023028167161123 0.0018585494179261656 4168.937895385227
0
\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00   \\
\hline
\end{tabular}


In [59]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.uniform.rvs(loc=-5,scale=10,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

yn,xn,nobs = gen_data()
print( (yn==0 ).sum(), (xn<=0).sum(),(xn>=0).sum() )
mc_out = vuong_tests3.monte_carlo(1,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)
print(mc_out)

208 527 473
[    0     3    15    63   255  1023  4095 16383]
-3.9076580766425217
37.65614220813493 -0.9550777776390584 -0.3461587230027072
38.964597236839786 -0.9230056811828796 -0.3345345011715443
44.198417351659224 -0.8137066159735634 -0.2949201098370775
65.13369781093697 -0.5521649441583961 -0.20012685503919833
148.87481964804792 -0.24157573926624354 -0.08755679523771802
483.83930699649176 -0.07433158921680298 -0.02694076713380967
1823.697256390267 -0.019720677041425303 -0.007147569068441298
7183.129053965368 -0.0050068075269721094 -0.0018146690672055377
----
0 pivot
0 pt
0 bc
\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00   \\
\hline
\end{tabular}
(array([0., 0., 1.]), array([0., 0., 1.]), array([0., 0., 1.]), array([0., 0., 1.]), array([0., 0., 1.]), -23.505117

# Actual test cases

In [63]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 250
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.uniform.rvs(loc=-5,scale=10,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

yn,xn,nobs = gen_data()
print( (yn==0 ).sum(), (xn<=0).sum(),(xn>=0).sum() )
mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)
print(mc_out)

52 129 121
\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.42 & 0.33 & 0.36 & 0.36 & 0.62   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 0.58 & 0.67 & 0.64 & 0.64 & 0.38   \\
\hline
\end{tabular}
(array([0.42, 0.  , 0.58]), array([0.33, 0.  , 0.67]), array([0.36, 0.  , 0.64]), array([0.36, 0.  , 0.64]), array([0.62, 0.  , 0.38]), -6.771424223049401, 3.243320718215194, 3.1360735399744977)


In [64]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.uniform.rvs(loc=-3,scale=6,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


yn,xn,nobs = gen_data()
print( (yn==0 ).sum(), (xn<=0).sum(),(xn>=0).sum() )
mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)
print(mc_out)

185 523 477
\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.06   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 1.00 & 1.00 & 1.00 & 1.00 & 0.94   \\
\hline
\end{tabular}
(array([0., 0., 1.]), array([0., 0., 1.]), array([0., 0., 1.]), array([0., 0., 1.]), array([0.06, 0.  , 0.94]), -56.65696981424574, 4.237090144038357, 4.403523594370941)


In [65]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.89 & 0.92 & 0.88 & 0.86 & 0.93   \\
Model 1 & 0.00 & 0.01 & 0.01 & 0.00 & 0.00   \\
Model 2 & 0.11 & 0.07 & 0.11 & 0.14 & 0.07   \\
\hline
\end{tabular}


In [66]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 250
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.91 & 0.89 & 0.89 & 0.91 & 0.94   \\
Model 1 & 0.01 & 0.01 & 0.01 & 0.01 & 0.00   \\
Model 2 & 0.08 & 0.10 & 0.10 & 0.08 & 0.06   \\
\hline
\end{tabular}


# Main examples

In [67]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.86 & 0.91 & 0.86 & 0.91 & 0.92   \\
Model 1 & 0.01 & 0.02 & 0.02 & 0.02 & 0.01   \\
Model 2 & 0.13 & 0.07 & 0.12 & 0.07 & 0.07   \\
\hline
\end{tabular}


In [19]:
beta0 = 1.
beta1 = .5

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.02 & 0.01 & 0.02 & 0.34   \\
Model 1 & 1.00 & 0.98 & 0.99 & 0.98 & 0.66   \\
Model 2 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
\hline
\end{tabular}


# Misc examples

In [62]:
beta0 = 1.
beta1 = 1.

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 500
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.22   \\
Model 1 & 1.00 & 1.00 & 1.00 & 1.00 & 0.78   \\
Model 2 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
\hline
\end{tabular}


In [63]:
beta0 = 1.
beta1 = 2.

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.09   \\
Model 1 & 1.00 & 1.00 & 1.00 & 1.00 & 0.91   \\
Model 2 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
\hline
\end{tabular}


In [64]:
def gen_data(beta0=beta0,beta1=1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs

mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.04   \\
Model 1 & 1.00 & 1.00 & 1.00 & 1.00 & 0.96   \\
Model 2 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
\hline
\end{tabular}


In [65]:
def gen_data(beta0=beta0,beta1=1):
    nobs = 1000
    #parameters
    sigma = 1
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    m = beta0 + epsilon
    #censor
    y[m<=0] = 0
    return y,x,nobs
    
mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.06 & 0.05 & 0.05 & 0.17 & 0.21   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 0.94 & 0.95 & 0.95 & 0.83 & 0.79   \\
\hline
\end{tabular}


In [66]:
beta0 = 1.
beta1 = .25

def gen_data(beta0=beta0,beta1=beta1):
    nobs = 1000
    #parameters
    sigma = 2
    
    epsilon = stats.norm.rvs(loc=0,scale=sigma,size=nobs)
    #censor data below x<0?
    x = stats.norm.rvs(loc=5,scale=5,size=nobs)
    y = beta0+ beta1*x + epsilon
    
    #censor
    y[y<=0] = 0
    return y,x,nobs


mc_out = vuong_tests3.monte_carlo(100,gen_data,setup_shi)
vuong_tests3.print_mc(mc_out)

\begin{tabular}{|c|c|c|c|c|c|}
\hline
Model &  Normal & Bootstrap & Bootstrap-bc & Bootstrap-bc & Shi (2015) \\ \hline \hline
No selection & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 1 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00   \\
Model 2 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00   \\
\hline
\end{tabular}
