In [9]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime
import scipy.stats as stats

#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import selection_tests

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
#need to replicate table 3?
lotto_raw = pd.read_stata('FinalAnon.dta')
print(lotto_raw.columns)

Index(['NperPstk', 'v01', 'v02a1', 'v02b1a', 'v02b1b', 'v02b1c', 'v02c1',
       'v02a2', 'v02b2a', 'v02b2b',
       ...
       'rememberStreetPrize', 'know_winnersA', 'know_winnersB',
       'num_neighbors_bot', 'num_neighbor_tixbot', 'neighbor_bot',
       'num_neighbors_botA', 'neighbor_botA', 'pstk', 'codegroup'],
      dtype='object', length=342)


In [11]:
lotto_data = lotto_raw.copy()
lotto_data['winner']=0
lotto_data['winner']= lotto_data['winner'] + (lotto_data['buyer']==1) + (lotto_data['win_code']==1)
lotto_data['winnings']= lotto_data['winner']*lotto_data['winnings']
lotto_data['sumwinnings']= lotto_data['sumwinnings']*lotto_data['winner']

#For the regressions, winnings are measured in euros/10000: 
lotto_data['winnings']=lotto_data['winnings']/10000

#Since it's not clear what is true for winners who report no winnings, drop these obs:

#NON-LOTTERY INCOME VARIABLE: 
#(note we use income last year, to avoid the problem that some hh included lottery winnings in current income)
#rescale own income to be in same units as lottery winnings (euro/10000):
lotto_data['inc_now']=lotto_data['inc_now']/10000
lotto_data['inc_then']=lotto_data['inc_then']/10000

In [12]:
#global covariates
global_cov = ['buyer','numtix','numtixsq',
              'partner_now', 'persons_now', 'kids', 'kids_sq', 'age_fam', 'age_famsq'] 
global_cov = global_cov + ['ed'+str(i) for i in range(2,9)]

In [41]:
depvar = 'newcar'
absorb = 'codegroup' 
imporant_cov = ['sumwinnings','inc_then','win_code']

lotto_data_clean = lotto_data.copy()
lotto_data_clean = sm.add_constant(lotto_data_clean[[depvar]+imporant_cov+global_cov +[absorb]+['wonbmw']])
lotto_data_clean = lotto_data_clean.dropna()
#lotto_data_clean = lotto_data_clean[lotto_data_clean['wonbmw']!=1]

y=lotto_data_clean[[depvar,absorb]]
ybar = y.mean()
y = y -  y.groupby(y[absorb]).transform('mean') + ybar
y = y[depvar]

X=lotto_data_clean[imporant_cov+['const']+global_cov+[absorb]]
Xbar = X.mean()
X = X - X.groupby(X[absorb]).transform('mean') + Xbar
X = X[imporant_cov+['const']+global_cov]

model = sm.OLS(y,X)
model.fit().params
model.fit().params[0:3]

sumwinnings    0.016740
inc_then       0.000633
win_code       0.037133
dtype: float64

foreach dv in newcar numcars ageofmaincar totalcarunits {
runregs `dv' `NW'

capture program drop runregs
program define runregs
args depvar NW
display " "
display " "
display "Sample Restriction: " "$condition1" 
*reg `depvar' sumwinnings inc_then `NW' $covariates buyer numtix numtixsq $condition1 , cluster (pstk)
areg `depvar' sumwinnings inc_then `NW' $covariates buyer numtix numtixsq $condition1 , absorb(codegroup) cluster (pstk)
test sumwinnings = inc_then
display " "
display " "
end