In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime
import scipy.stats as stats

#graphing
import matplotlib.pyplot as plt
#stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

#import testing
import sys
sys.path.append("../")
import selection_tests

In [2]:
#need to replicate table 3?
lotto_raw = pd.read_stata('FinalAnon.dta')
print(lotto_raw.columns)

Index(['NperPstk', 'v01', 'v02a1', 'v02b1a', 'v02b1b', 'v02b1c', 'v02c1',
       'v02a2', 'v02b2a', 'v02b2b',
       ...
       'rememberStreetPrize', 'know_winnersA', 'know_winnersB',
       'num_neighbors_bot', 'num_neighbor_tixbot', 'neighbor_bot',
       'num_neighbors_botA', 'neighbor_botA', 'pstk', 'codegroup'],
      dtype='object', length=342)


In [3]:
lotto_data = lotto_raw.copy()
lotto_data['winner']=0
lotto_data['winner']= 1*( (lotto_data['buyer']>=1) & (lotto_data['win_code']>=1))
lotto_data['winnings']= lotto_data['winner']*lotto_data['winnings']
lotto_data['sumwinnings']= lotto_data['sumwinnings']*lotto_data['winner']

#For the regressions, winnings are measured in euros/10000: 
lotto_data['winnings']=lotto_data['winnings']/10000
print( lotto_data.shape, lotto_data[['winner','winnings','sumwinnings']].mean() )

(1879, 342) winner         0.118680
winnings       0.220703
sumwinnings    0.253965
dtype: float64


In [4]:
print((((lotto_data['winner']==1) & (lotto_data['winnings']==0))!=1).sum() )
#Since it's not clear what is true for winners who report no winnings, drop these obs:
lotto_data = lotto_data[  (((lotto_data['winner']==1) & (lotto_data['winnings']==0))!=1) ]
lotto_data = lotto_data[  (((lotto_data['winner']==1) & (lotto_data['sumwinnings']==0))!=1) ]
print(lotto_data[lotto_data['winner']==1]['winnings'].mean())
print( lotto_data[lotto_data['winner']==1].shape)

#NON-LOTTERY INCOME VARIABLE: 
#(note we use income last year, to avoid the problem that some hh included lottery winnings in current income)
#rescale own income to be in same units as lottery winnings (euro/10000):
lotto_data['inc_now']=lotto_data['inc_now']/10000
lotto_data['inc_then']=lotto_data['inc_then']/10000

1850
2.1376288659793814
(194, 342)


In [5]:
###############global covariates###################
global_cov = ['buyer','numtix','numtixsq',
              'partner_now', 'persons_now', 'kids', 'kids_sq', 'age_fam', 'age_famsq'] 
global_cov = global_cov + ['ed'+str(i) for i in range(2,9)]
####################################################
depvar = 'newcar'
absorb = 'codegroup' 
imporant_cov = ['sumwinnings','inc_then','win_code']

lotto_data_clean = lotto_data.copy()
print(lotto_data_clean.shape)

lotto_data_clean = lotto_data_clean[lotto_data_clean['wonbmw']!=1]
print(lotto_data_clean.shape)
print(lotto_data_clean.shape)
print(lotto_data_clean['newcar'].dropna().mean(),lotto_data_clean['newcar'].dropna().shape )
lotto_data_clean = sm.add_constant(lotto_data_clean[[depvar]+imporant_cov+global_cov +[absorb]])
lotto_data_clean = lotto_data_clean.dropna()

#should be 1389 observations?

(1850, 342)
(1825, 342)
(1825, 342)
0.1873259052924791 (1436,)


In [6]:
y=lotto_data_clean[[depvar,absorb]]
ybar = y.mean()
y = y -  y.groupby(y[absorb]).transform('mean') + ybar
y = y[depvar]

X=lotto_data_clean[imporant_cov+['const']+global_cov+[absorb]]
Xbar = X.mean()
X = X - X.groupby(X[absorb]).transform('mean') + Xbar
X = X[imporant_cov+['const']+global_cov]

model = sm.OLS(y,X)
print(model.fit().params[0:3])

sumwinnings    0.020825
inc_then      -0.000222
win_code       0.045983
dtype: float64
