In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
import statsmodels.formula.api as smf 
import os

In [None]:
os.chdir('/home/jovyan/PPPA8022/Replication/Final')

In [None]:
a0414 = pd.read_csv('full_20042014.csv')
a1014 = pd.read_csv('full_20102014.csv')
g0414 = pd.read_csv('growth_20042014.csv')
g1014 = pd.read_csv('growth_20102014.csv')

### Trends Test 
"If long-run trends differ between Kansas and the other states that serve as a control group, then we risk interpreting preexisting conditions in employment trends as a treatment effect. To check for this, we conduct a trend test using the quarterly data from 2004 to 2014 and regressing QWI quarterly county establishment employment on eleven year dummies, state fixed effects, and interactions of the Kansas state indicator and year dummies. If the coefficient estimates on the interaction terms are statistically significant, then Kansas employment deviates from the control group employment trend. The results are available upon request and show that the coefficient estimates on the interaction terms are statistically insignificant. Thus, we are more confident that any policy effect we detect from the Kansas tax base change is not due to differing long-term employment trends."

In [None]:
a0414.columns

In [None]:
trend = a0414.filter(['Emp', 'year', 'stateFIPS'])

In [None]:
trend_test = smf.ols(formula='Emp ~ C(year)*C(stateFIPS)', data=trend).fit()
print(trend_test.summary())

In [None]:
from statsmodels.iolib.summary2 import summary_col
trend_test = summary_col([trend_test],stars=True)

In [None]:
trend_test

So...we pass the trend test! nearly all params and interactions are insignificant...

### Linear Regressions

#### 2004-2010

In [None]:
#log linear
sp1 = """logy ~ KPost2012 + pop10 + str + imtr + cmtr + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec1 = smf.ols(formula=sp1, data=a0414).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': a0414.stateFIPS})
spec1.summary()

In [None]:
#per capita
sp2 = """percap_emp ~ KPost2012 + str + imtr + cmtr + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec2 = smf.ols(formula=sp2, data=a0414).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': a0414.stateFIPS})
spec2.summary()

In [None]:
#growth rate
sp3 = """emp_growth ~ KPost2012 + str + cmtr + pop10 + imtr + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec3 = smf.ols(formula=sp3, data=g0414).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': g0414.stateFIPS})
spec3.summary()

#### 2010-2014

In [None]:
#log linear
sp4 = """logy ~ KPost2012 + pop10 + str + imtr + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec4 = smf.ols(formula=sp4, data=a1014).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': a1014.stateFIPS})
spec4.summary()

In [None]:
#per capita 
sp5 = """percap_emp ~ KPost2012 + str + imtr + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec5 = smf.ols(formula=sp5, data=a1014).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': a1014.stateFIPS})
spec5.summary()

In [None]:
g1014 = g1014[g1014['qtryr'] != 2010.1]

In [None]:
len(g1014)

In [None]:
len(a1014)

In [None]:
g1014.year.unique()

In [None]:
#growth rate
sp6 = """emp_growth ~ KPost2012 + str + imtr + pop10 + C(qtryr) +C(countyFIPS) + tt_CO +
tt_KS + tt_MO + tt_NE + tt_OK"""
spec6 = smf.ols(formula=sp6, data=g1014).fit(cov_type ='cluster', cov_kwds=
                                             {'groups': g1014.stateFIPS})
spec6.summary()

In [None]:
from statsmodels.iolib.summary2 import summary_col
output = summary_col([spec1,spec2,spec3,spec4, spec5, spec6],stars=True)

#### My Results

In [None]:
output

#### Author's Results
![alt text](http://journals.sagepub.com/na101/home/literatum/publisher/sage/journals/content/pfrb/0/pfrb.ahead-of-print/1091142117699274/20170324/images/large/10.1177_1091142117699274-table3.jpeg)

Author note: 'We estimate model 1 allowing for different function forms, and since the policy variable is at the state level and the analysis covers all counties in five states over time, we cluster the standard errors at the state level.' So...