In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime
import scipy.stats as stats

#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import selection_tests

In [2]:
#need to replicate table 3?
lotto_raw = pd.read_stata('FinalAnon.dta')
print(lotto_raw.columns)

lotto_data = lotto_raw.copy()
lotto_data['winner']=0
lotto_data['winner']= 1*( (lotto_data['buyer']>=1) & (lotto_data['win_code']>=1))
lotto_data['winnings']= lotto_data['winner']*lotto_data['winnings']
lotto_data['sumwinnings']= lotto_data['sumwinnings']*lotto_data['winner']

#For the regressions, winnings are measured in euros/10000: 
lotto_data['winnings']=lotto_data['winnings']/10000
print( lotto_data.shape, lotto_data[['winner','winnings','sumwinnings']].mean() )

print((((lotto_data['winner']==1) & (lotto_data['winnings']==0))!=1).sum() )
#Since it's not clear what is true for winners who report no winnings, drop these obs:
lotto_data = lotto_data[  (((lotto_data['winner']==1) & (lotto_data['winnings']==0))!=1) ]
lotto_data = lotto_data[  (((lotto_data['winner']==1) & (lotto_data['sumwinnings']==0))!=1) ]
print(lotto_data[lotto_data['winner']==1]['winnings'].mean())
print( lotto_data[lotto_data['winner']==1].shape)

#NON-LOTTERY INCOME VARIABLE: 
#(note we use income last year, to avoid the problem that some hh included lottery winnings in current income)
#rescale own income to be in same units as lottery winnings (euro/10000):
lotto_data['inc_now']=lotto_data['inc_now']/10000
lotto_data['inc_then']=lotto_data['inc_then']/10000

Index(['NperPstk', 'v01', 'v02a1', 'v02b1a', 'v02b1b', 'v02b1c', 'v02c1',
       'v02a2', 'v02b2a', 'v02b2b',
       ...
       'rememberStreetPrize', 'know_winnersA', 'know_winnersB',
       'num_neighbors_bot', 'num_neighbor_tixbot', 'neighbor_bot',
       'num_neighbors_botA', 'neighbor_botA', 'pstk', 'codegroup'],
      dtype='object', length=342)
(1879, 342) winner         0.118680
winnings       0.220703
sumwinnings    0.253965
dtype: float64
1850
2.1376288659793814
(194, 342)


In [3]:
class OLS_loglike(GenericLikelihoodModel):
    
    def __init__(self, *args,ols=False, **kwargs):
        super(OLS_loglike,self).__init__(*args,**kwargs)
        self.ols = ols

    def loglikeobs(self, params):
        y = self.endog
        x = self.exog
        mu_y = np.matmul(x,params)  
        resid = y - mu_y
        sigma = np.sqrt(np.sum(resid**2)/resid.shape[0])
        pr_y = stats.norm.logpdf( resid, loc=0,scale=sigma )
        return pr_y


In [4]:
def setup_helper(data,depvar,nw):
    ###############global covariates###################
    global_cov = ['buyer','numtix','numtixsq',
                  'partner_now', 'persons_now', 'kids', 'kids_sq', 'age_fam', 'age_famsq'] 
    global_cov = global_cov + ['ed'+str(i) for i in range(2,9)]
    ####################################################
    absorb = 'codegroup' 
    important_cov = ['sumwinnings','inc_then'] + [nw]

    lotto_data_clean = data.copy()
    if depvar == 'newcar':
        lotto_data_clean = lotto_data_clean[lotto_data_clean['wonbmw']!=1]
    lotto_data_clean = sm.add_constant(lotto_data_clean[[depvar]+important_cov+global_cov +[absorb]])
    lotto_data_clean = lotto_data_clean.dropna()

    #should be 1389 observations?
    y=lotto_data_clean[[depvar,absorb]]
    ybar = y.mean()
    y = y -  y.groupby(y[absorb]).transform('mean') + ybar
    y = y[depvar]

    X=lotto_data_clean[important_cov+['const']+global_cov+[absorb]]
    Xbar = X.mean()
    X = X - X.groupby(X[absorb]).transform('mean') + Xbar
    X = X[important_cov+['const']+global_cov]

    return y,X


def setup_test(yn,xn,depvar='happy_now' ):
    
    yn,x2n = setup_helper(xn.copy(), depvar,'neighbor_won')
    yn,x1n = setup_helper(xn.copy(), depvar,'win_code')
    # model 1 grad, etc.
    model1 = sm.OLS(yn,x1n)
    model1_fit = model1.fit(disp=False)
    params1 = (model1_fit.params)
    model1_deriv = OLS_loglike(yn,x1n)
    ll1 = model1_deriv.loglikeobs(model1_fit.params)
    grad1 =  model1_deriv.score_obs(model1_fit.params)    
    hess1 = model1_deriv.hessian(model1_fit.params)

    #model 2 grad, etc.
    model2 = sm.OLS(yn,x2n)
    model2_fit = model2.fit(disp=False)
    params2 = (model2_fit.params)
    model2_deriv = OLS_loglike(yn,x2n)
    ll2 = model2_deriv.loglikeobs(model2_fit.params)
    grad2 =  model2_deriv.score_obs(model2_fit.params)    
    hess2 = model2_deriv.hessian(model2_fit.params)
    return ll1,grad1,hess1,params1,ll2,grad2,hess2,params2


#win_code total
#neighbor_won total

#win_code happy_now
#neighbor_won happy_now

ll1,grad1,hess1,params1,ll2,grad2,hess2,params2 = setup_test(lotto_data,lotto_data,depvar='newcar')

  x = pd.concat(x[::order], 1)


In [5]:
setup_test3 = lambda yn,xn: setup_test(yn,xn,depvar='ext_reno')
print(selection_tests.test_table(lotto_data,lotto_data,setup_test3))

regular: test, llr, omega ----
0.15499602439235502 0.2727632130792981 0.041711445238237284
---- 


  x = pd.concat(x[::order], 1)


\begin{center}
\begin{tabular}{ccccc}
\toprule
\textbf{Version} & \textbf{Result} & \textbf{90 \% CI} & \textbf{95 \% CI} & \textbf{99 \% CI} \\ \midrule
Shi (2015) & H0 & [-1.723, 2.274] & [-2.876, 3.427] & [-4.186, 4.736] \\
Classical & H0 & [-1.490, 1.800] & [-1.804, 2.114] & [-2.421, 2.731] \\
Bootstrap & H0 & [-1.403, 1.762] & [-1.767, 2.007] & [-2.637, 2.547] \\
\bottomrule
\end{tabular}
\end{center}
None


In [6]:
print(selection_tests.test_table(lotto_data,lotto_data,setup_test))
setup_test1 = lambda yn,xn: setup_test(yn,xn,depvar='total')
print(selection_tests.test_table(lotto_data,lotto_data,setup_test1))
setup_test2 = lambda yn,xn: setup_test(yn,xn,depvar='newcar')
print(selection_tests.test_table(lotto_data,lotto_data,setup_test2))


  x = pd.concat(x[::order], 1)


regular: test, llr, omega ----
-0.6980169536900935 -1.2731589621031527 0.04376397398773363
---- 


  x = pd.concat(x[::order], 1)
  x = pd.concat(x[::order], 1)


\begin{center}
\begin{tabular}{ccccc}
\toprule
\textbf{Version} & \textbf{Result} & \textbf{90 \% CI} & \textbf{95 \% CI} & \textbf{99 \% CI} \\ \midrule
Shi (2015) & H0 & [-2.582, 1.107] & [-3.194, 1.719] & [-4.521, 3.046] \\
Classical & H0 & [-2.343, 0.947] & [-2.657, 1.261] & [-3.274, 1.878] \\
Bootstrap & H0 & [-2.398, 0.918] & [-2.687, 1.231] & [-3.206, 1.901] \\
\bottomrule
\end{tabular}
\end{center}
None


  x = pd.concat(x[::order], 1)


regular: test, llr, omega ----
0.031864184961234415 0.007378562636340291 0.005756780828434276
---- 


  x = pd.concat(x[::order], 1)
  x = pd.concat(x[::order], 1)


\begin{center}
\begin{tabular}{ccccc}
\toprule
\textbf{Version} & \textbf{Result} & \textbf{90 \% CI} & \textbf{95 \% CI} & \textbf{99 \% CI} \\ \midrule
Shi (2015) & H0 & [-1.888, 2.051] & [-2.282, 2.445] & [-5.626, 5.789] \\
Classical & H0 & [-1.613, 1.677] & [-1.927, 1.991] & [-2.544, 2.608] \\
Bootstrap & H0 & [-1.503, 1.749] & [-1.800, 2.041] & [-2.419, 2.974] \\
\bottomrule
\end{tabular}
\end{center}
None


  x = pd.concat(x[::order], 1)


regular: test, llr, omega ----
-0.5270234080196616 -1.1001998981381456 0.05601322426309195
---- 


  x = pd.concat(x[::order], 1)


\begin{center}
\begin{tabular}{ccccc}
\toprule
\textbf{Version} & \textbf{Result} & \textbf{90 \% CI} & \textbf{95 \% CI} & \textbf{99 \% CI} \\ \midrule
Shi (2015) & H0 & [-2.498, 1.501] & [-3.219, 2.222] & [-4.753, 3.756] \\
Classical & H0 & [-2.172, 1.118] & [-2.486, 1.432] & [-3.103, 2.049] \\
Bootstrap & H0 & [-2.232, 1.041] & [-2.491, 1.320] & [-2.889, 1.841] \\
\bottomrule
\end{tabular}
\end{center}
None
