In [1]:
import pandas as pd
import numpy as np

## Load State Demographic Data

In [2]:
#Look at Columns
demo_data = pd.read_csv('../00_source_data/sc-est2019-alldata5.csv')
demo_data.columns

Index(['SUMLEV', 'REGION', 'DIVISION', 'STATE', 'NAME', 'SEX', 'ORIGIN',
       'RACE', 'AGE', 'CENSUS2010POP', 'ESTIMATESBASE2010', 'POPESTIMATE2010',
       'POPESTIMATE2011', 'POPESTIMATE2012', 'POPESTIMATE2013',
       'POPESTIMATE2014', 'POPESTIMATE2015', 'POPESTIMATE2016',
       'POPESTIMATE2017', 'POPESTIMATE2018', 'POPESTIMATE2019'],
      dtype='object')

In [3]:
#We only want the census data for 2010
demo_data = demo_data.drop(columns=['SUMLEV','ESTIMATESBASE2010', 'POPESTIMATE2010'])
#We re-name the columns with their years
demo_data = demo_data.rename(columns={"CENSUS2010POP": "2010", "POPESTIMATE2011": "2011","POPESTIMATE2012": "2012",
                          "POPESTIMATE2013": "2013","POPESTIMATE2014": "2014","POPESTIMATE2015": "2015",
                         "POPESTIMATE2016": "2016", "POPESTIMATE2017": "2017", "POPESTIMATE2018": "2018",
                         "POPESTIMATE2019": "2019"})
demo_data.head()

Unnamed: 0,REGION,DIVISION,STATE,NAME,SEX,ORIGIN,RACE,AGE,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,3,6,1,Alabama,0,0,1,0,40144,41121,40263,39189,40105,40331,40231,40165,38838,37673
1,3,6,1,Alabama,0,0,1,1,40184,39919,41037,40229,39290,40364,40316,40427,40254,39151
2,3,6,1,Alabama,0,0,1,2,41688,40232,39835,40983,40116,39406,40486,40529,40670,40494
3,3,6,1,Alabama,0,0,1,3,41722,41702,40168,39912,41059,40251,39527,40637,40779,41028
4,3,6,1,Alabama,0,0,1,4,41084,41722,41608,40125,39890,40929,40241,39522,40721,40974


In [4]:
#Create Year and Population Columns
demo_data = demo_data.melt(id_vars=['REGION','DIVISION','STATE','NAME','SEX','ORIGIN','RACE','AGE'], var_name='Year',value_vars = ['2010','2011','2012','2013','2014','2015','2016','2017','2018','2019'],value_name='Population')
demo_data.head()

Unnamed: 0,REGION,DIVISION,STATE,NAME,SEX,ORIGIN,RACE,AGE,Year,Population
0,3,6,1,Alabama,0,0,1,0,2010,40144
1,3,6,1,Alabama,0,0,1,1,2010,40184
2,3,6,1,Alabama,0,0,1,2,2010,41688
3,3,6,1,Alabama,0,0,1,3,2010,41722
4,3,6,1,Alabama,0,0,1,4,2010,41084


## Creat Gender Percentages

In [5]:
#Create Female % and Male %
sex_data = demo_data[['NAME','SEX','Year','Population']]
sex_data = sex_data.groupby(['NAME','SEX','Year']).sum()
sex_data = sex_data.reset_index()
sex_data = sex_data.pivot(index=['NAME','Year'], columns='SEX',values='Population')
sex_data = sex_data.reset_index()
#Total = 0, Male = 1, Female = 2
sex_data['Female_pct'] = sex_data[2]/sex_data[0]
sex_data['Male_pct'] = sex_data[1]/sex_data[0]
sex_data.head()

SEX,NAME,Year,0,1,2,Female_pct,Male_pct
0,Alabama,2010,9696196,4707146,4989050,0.514537,0.485463
1,Alabama,2011,9741692,4726808,5014884,0.514786,0.485214
2,Alabama,2012,9780674,4744834,5035840,0.514877,0.485123
3,Alabama,2013,9814862,4760590,5054272,0.514961,0.485039
4,Alabama,2014,9843016,4772148,5070868,0.515174,0.484826


## Merge and Create Race Percentages

In [6]:
#Create Race %
race_data = demo_data[['NAME','RACE','Year','Population']]
race_data = race_data.groupby(['NAME','RACE','Year']).sum()
race_data = race_data.reset_index()
race_data = race_data.pivot(index=['NAME','Year'], columns='RACE',values='Population')
race_data = race_data.reset_index()


In [7]:

race_sex_data = pd.merge(race_data,sex_data,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])
#White=1, Black=2, Native=3, Asian=4, Pacific Islander=5
race_sex_data['White_pct'] = race_sex_data['1_y']/race_sex_data[0]
race_sex_data['Black_pct'] = race_sex_data['2_y']/race_sex_data[0]
race_sex_data['Native_pct'] = race_sex_data[3]/race_sex_data[0]
race_sex_data['Asian_pct'] = race_sex_data[4]/race_sex_data[0]
race_sex_data['PI_pct'] = race_sex_data[5]/race_sex_data[0]

In [8]:
race_sex_data.head()

Unnamed: 0,NAME,Year,1_x,2_x,3,4,5,0,1_y,2_y,Female_pct,Male_pct,White_pct,Black_pct,Native_pct,Asian_pct,PI_pct
0,Alabama,2010,13681172,5154140,251716,273428,31936,9696196,4707146,4989050,0.514537,0.485463,0.485463,0.514537,0.02596,0.0282,0.003294
1,Alabama,2011,13718404,5193768,253844,285780,31588,9741692,4726808,5014884,0.514786,0.485214,0.485214,0.514786,0.026057,0.029336,0.003243
2,Alabama,2012,13735824,5235360,256576,301756,31832,9780674,4744834,5035840,0.514877,0.485123,0.485123,0.514877,0.026233,0.030852,0.003255
3,Alabama,2013,13749424,5276176,258816,313296,32012,9814862,4760590,5054272,0.514961,0.485039,0.485039,0.514961,0.02637,0.031921,0.003262
4,Alabama,2014,13762252,5306836,260960,323652,32332,9843016,4772148,5070868,0.515174,0.484826,0.484826,0.515174,0.026512,0.032881,0.003285


## Create Age Groups and Percentages

In [9]:
#Create Age Groups
age_data = demo_data[['NAME','AGE','Year','Population']]
data0_18 = age_data[age_data['AGE'] < 18].groupby(['NAME','Year']).sum()
data0_18 = data0_18.drop(columns=['AGE'])
data0_18 = data0_18.reset_index()
data0_18 = data0_18.rename(columns={'Population':'Pop_0_18'})

data18_25 = age_data[(age_data['AGE'] >= 18) & (age_data['AGE'] < 25)].groupby(['NAME','Year']).sum()
data18_25 = data18_25.drop(columns=['AGE'])
data18_25 = data18_25.reset_index()
data18_25 = data18_25.rename(columns={'Population':'Pop_18_25'})

data25_40 = age_data[(age_data['AGE'] >= 25) & (age_data['AGE'] < 40)].groupby(['NAME','Year']).sum()
data25_40 = data25_40.drop(columns=['AGE'])
data25_40 = data25_40.reset_index()
data25_40 = data25_40.rename(columns={'Population':'Pop_25_40'})

data40_65 = age_data[(age_data['AGE'] >= 40) & (age_data['AGE'] < 65)].groupby(['NAME','Year']).sum()
data40_65 = data40_65.drop(columns=['AGE'])
data40_65 = data40_65.reset_index()
data40_65 = data40_65.rename(columns={'Population':'Pop_40_65'})

data65 = age_data[age_data['AGE'] >= 65].groupby(['NAME','Year']).sum()
data65 = data65.drop(columns=['AGE'])
data65 = data65.reset_index()
data65 = data65.rename(columns={'Population':'Pop_65'})

In [10]:
age_d1 = pd.merge(data0_18,data18_25,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])
age_d2 = pd.merge(age_d1,data25_40,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])
age_d3 = pd.merge(age_d2,data40_65,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])
age_groups = pd.merge(age_d3,data65,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])

In [11]:
age_groups.head()

Unnamed: 0,NAME,Year,Pop_0_18,Pop_18_25,Pop_25_40,Pop_40_65,Pop_65
0,Alabama,2010,4654852,1946012,3714172,6429224,2648132
1,Alabama,2011,4628256,1958084,3688764,6502168,2706112
2,Alabama,2012,4606316,1974800,3683088,6485880,2811264
3,Alabama,2013,4584672,1981568,3693168,6471616,2898700
4,Alabama,2014,4573076,1958668,3717468,6447860,2988960


## Merge Race, Sex, Age Together

In [12]:
#Merge AGE groups with Sex and Race Groups
final_demo = pd.merge(race_sex_data,age_groups,how='inner',left_on=['NAME','Year'],right_on=['NAME','Year'])

#Create Age Percentages
final_demo['Pct_0_18'] = final_demo['Pop_0_18']/final_demo[0]
final_demo['Pct_18_25'] = final_demo['Pop_18_25']/final_demo[0]
final_demo['Pct_25_40'] = final_demo['Pop_25_40']/final_demo[0]
final_demo['Pct_40_65'] = final_demo['Pop_40_65']/final_demo[0]
final_demo['Pct_65'] = final_demo['Pop_65']/final_demo[0]

final_demo = final_demo[['NAME','Year','Female_pct','Male_pct','White_pct','Black_pct','Native_pct','Asian_pct',
                         'PI_pct','Pct_0_18','Pct_18_25','Pct_25_40','Pct_40_65','Pct_65']]
final_demo.head()

Unnamed: 0,NAME,Year,Female_pct,Male_pct,White_pct,Black_pct,Native_pct,Asian_pct,PI_pct,Pct_0_18,Pct_18_25,Pct_25_40,Pct_40_65,Pct_65
0,Alabama,2010,0.514537,0.485463,0.485463,0.514537,0.02596,0.0282,0.003294,0.48007,0.200699,0.383055,0.663067,0.27311
1,Alabama,2011,0.514786,0.485214,0.485214,0.514786,0.026057,0.029336,0.003243,0.475098,0.201,0.378657,0.667458,0.277787
2,Alabama,2012,0.514877,0.485123,0.485123,0.514877,0.026233,0.030852,0.003255,0.470961,0.201908,0.376568,0.663132,0.28743
3,Alabama,2013,0.514961,0.485039,0.485039,0.514961,0.02637,0.031921,0.003262,0.467115,0.201895,0.376283,0.659369,0.295338
4,Alabama,2014,0.515174,0.484826,0.484826,0.515174,0.026512,0.032881,0.003285,0.464601,0.198991,0.377676,0.65507,0.303663


## Output as CSV

In [13]:
#Output as CSV
final_demo.to_csv('../20_intermediate_files/full_demographic_dataset.csv')

states = ['South Dakota', 'West Virginia', 'Nebraska', 'Iowa', 'Idaho', 'Kansas']
final_demo = final_demo[final_demo['NAME'].isin(states)]
final_demo.to_csv('../20_intermediate_files/final_demographic_dataset.csv')

# Detailed Regression Analysis

In [14]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import statsmodels.api as sm
import statsmodels.formula.api as smf

### Load Dataset

In [15]:
df = pd.read_csv('../20_intermediate_files/final_dataset.csv')
df = df.drop('Unnamed: 0', axis=1)
df['treatment'] = pd.Categorical(df['treatment'])

In [16]:
df.head()

Unnamed: 0,State,Year,FIPS_Code,Civilian_Pop,Civilian_Labor_Force,Labor_Force_Pct,Employed_Total,Employed_Pct,Unemployed_Total,Unemployed_Rate,Population,gdp,treatment
0,Idaho,2010,16.0,1165430.0,762158.666667,65.408333,695375.583333,59.683333,66783.083333,8.758333,1570746.0,57952.9,0
1,Idaho,2011,16.0,1179092.0,764066.916667,64.8,701595.75,59.508333,62471.166667,8.166667,1583910.0,57825.3,0
2,Idaho,2012,16.0,1193211.0,772073.333333,64.708333,715011.583333,59.908333,57061.75,7.4,1595324.0,57780.1,0
3,Idaho,2013,16.0,1208621.0,777682.75,64.35,724710.75,59.966667,52972.0,6.816667,1611206.0,59966.8,0
4,Idaho,2014,16.0,1226459.0,777061.5,63.358333,742988.166667,60.575,34073.333333,4.383333,1631112.0,61663.2,0


### Regress Unemployment_Rate on treatment

In [17]:
result1 = smf.ols('Unemployed_Rate ~ C(treatment)', data=df).fit()
result1.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.015
Method:,Least Squares,F-statistic:,0.1494
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,0.701
Time:,19:45:52,Log-Likelihood:,-115.39
No. Observations:,60,AIC:,234.8
Df Residuals:,58,BIC:,239.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.6781,0.307,15.215,0.000,4.063,5.294
C(treatment)[T.1],-0.1681,0.435,-0.386,0.701,-1.038,0.702

0,1,2,3
Omnibus:,7.165,Durbin-Watson:,0.504
Prob(Omnibus):,0.028,Jarque-Bera (JB):,7.468
Skew:,0.851,Prob(JB):,0.0239
Kurtosis:,2.702,Cond. No.,2.62


### Regress Unemployment_Rate on treatment (Add Control variables)

In [18]:
result2 = smf.ols('Unemployed_Rate ~ C(treatment) + gdp + Population', data=df).fit()
result2.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.593
Model:,OLS,Adj. R-squared:,0.571
Method:,Least Squares,F-statistic:,27.21
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,5.48e-11
Time:,19:45:52,Log-Likelihood:,-88.492
No. Observations:,60,AIC:,185.0
Df Residuals:,56,BIC:,193.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.8739,0.743,2.524,0.014,0.386,3.361
C(treatment)[T.1],0.9405,0.403,2.333,0.023,0.133,1.748
gdp,-9.873e-05,1.1e-05,-9.014,0.000,-0.000,-7.68e-05
Population,5.93e-06,7.08e-07,8.382,0.000,4.51e-06,7.35e-06

0,1,2,3
Omnibus:,4.089,Durbin-Watson:,0.502
Prob(Omnibus):,0.129,Jarque-Bera (JB):,3.552
Skew:,0.315,Prob(JB):,0.169
Kurtosis:,4.012,Cond. No.,12600000.0


## Load Demographic Data

In [19]:
demo_df = pd.read_csv('../20_intermediate_files/final_demographic_dataset.csv')
demo_df = demo_df.drop('Unnamed: 0', axis=1)

In [20]:
final_df = pd.merge(demo_df,df,how='inner',left_on=['NAME','Year'],right_on=['State','Year'])

## Add demographic columns

In [21]:
#Create Male - Female Column (Male% - Female %)
#Create White% - Black%, White% - Native%, White% - Asian%, White% - PI%
final_df['M_minus_F'] = final_df['Male_pct'] - final_df['Female_pct']
final_df['W_minus_Black'] = final_df['White_pct'] - final_df['Black_pct']
final_df['W_minus_Native'] = final_df['White_pct'] - final_df['Native_pct']
final_df['W_minus_Asian'] = final_df['White_pct'] - final_df['Asian_pct']
final_df['W_minus_PI'] = final_df['White_pct'] - final_df['PI_pct']

#Add pre/post
pre = final_df[np.logical_and(final_df['Year'] >= 2010,final_df['Year'] <= 2014)].copy()
pre['Post'] = 0
post = final_df[np.logical_and(final_df['Year'] >= 2016,final_df['Year'] <= 2019)].copy()
post['Post'] = 1 

selected_df = pd.concat([pre, post])

selected_df.head()

Unnamed: 0,NAME,Year,Female_pct,Male_pct,White_pct,Black_pct,Native_pct,Asian_pct,PI_pct,Pct_0_18,Pct_18_25,Pct_25_40,Pct_40_65,Pct_65,State,FIPS_Code,Civilian_Pop,Civilian_Labor_Force,Labor_Force_Pct,Employed_Total,Employed_Pct,Unemployed_Total,Unemployed_Rate,Population,gdp,treatment,M_minus_F,W_minus_Black,W_minus_Native,W_minus_Asian,W_minus_PI,Post
0,Idaho,2010,0.499106,0.500894,0.500894,0.499106,0.052036,0.038244,0.006878,0.556127,0.198024,0.389419,0.611566,0.244863,Idaho,16.0,1165430.0,762158.666667,65.408333,695375.583333,59.683333,66783.083333,8.758333,1570746.0,57952.9,0,0.001787,0.001787,0.448857,0.462649,0.494015,0
1,Idaho,2011,0.499316,0.500684,0.500684,0.499316,0.053021,0.0396,0.007038,0.5502,0.197314,0.386402,0.614967,0.251117,Idaho,16.0,1179092.0,764066.916667,64.8,701595.75,59.508333,62471.166667,8.166667,1583910.0,57825.3,0,0.001367,0.001367,0.447663,0.461084,0.493646,0
2,Idaho,2012,0.499362,0.500638,0.500638,0.499362,0.053508,0.040696,0.007199,0.545348,0.197034,0.384998,0.610708,0.261912,Idaho,16.0,1193211.0,772073.333333,64.708333,715011.583333,59.908333,57061.75,7.4,1595324.0,57780.1,0,0.001277,0.001277,0.447131,0.459942,0.49344,0
3,Idaho,2013,0.499199,0.500801,0.500801,0.499199,0.054108,0.041306,0.007446,0.541095,0.196193,0.385675,0.605855,0.271182,Idaho,16.0,1208621.0,777682.75,64.35,724710.75,59.966667,52972.0,6.816667,1611206.0,59966.8,0,0.001603,0.001603,0.446693,0.459495,0.493356,0
4,Idaho,2014,0.498997,0.501003,0.501003,0.498997,0.054473,0.042238,0.007643,0.537426,0.194669,0.387019,0.601235,0.279651,Idaho,16.0,1226459.0,777061.5,63.358333,742988.166667,60.575,34073.333333,4.383333,1631112.0,61663.2,0,0.002005,0.002005,0.44653,0.458765,0.49336,0


## Model 1 - Treatment, Population, Sex, Race, Age

In [22]:
final_df["Unemployed_Rate"] = final_df["Unemployed_Rate"] / 100

In [23]:
model1 = smf.ols('Unemployed_Rate ~ C(treatment) + Population + Female_pct + Black_pct  + Pct_0_18 + PI_pct + Pct_18_25 + Pct_25_40 + Pct_40_65 + Pct_65', data=final_df).fit()
model1.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.847
Model:,OLS,Adj. R-squared:,0.823
Method:,Least Squares,F-statistic:,35.27
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,3.53e-18
Time:,19:45:52,Log-Likelihood:,217.14
No. Observations:,60,AIC:,-416.3
Df Residuals:,51,BIC:,-397.4
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.8380,0.484,-1.731,0.090,-1.810,0.134
C(treatment)[T.1],-0.0270,0.011,-2.450,0.018,-0.049,-0.005
Population,-9.84e-09,1.06e-08,-0.930,0.357,-3.11e-08,1.14e-08
Female_pct,1.2795,0.968,1.322,0.192,-0.664,3.223
Black_pct,1.2795,0.968,1.322,0.192,-0.664,3.223
Pct_0_18,0.0475,0.295,0.161,0.873,-0.545,0.640
PI_pct,-5.2078,1.777,-2.931,0.005,-8.775,-1.640
Pct_18_25,-1.2857,0.223,-5.773,0.000,-1.733,-0.839
Pct_25_40,-0.1458,0.278,-0.524,0.603,-0.705,0.413

0,1,2,3
Omnibus:,12.616,Durbin-Watson:,0.512
Prob(Omnibus):,0.002,Jarque-Bera (JB):,13.839
Skew:,0.931,Prob(JB):,0.000988
Kurtosis:,4.438,Cond. No.,1.9e+24


# Model 1A - Treatment, Pre/Post, Population, Sex, Race, Age

In [24]:
model1a = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + Population + M_minus_F + W_minus_Black + W_minus_Asian + W_minus_Native + W_minus_PI + Pct_0_18 + Pct_18_25 + Pct_25_40 + Pct_40_65 + Pct_65', data=selected_df).fit()
model1a.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.941
Model:,OLS,Adj. R-squared:,0.924
Method:,Least Squares,F-statistic:,54.64
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,2.67e-21
Time:,19:45:52,Log-Likelihood:,-28.476
No. Observations:,54,AIC:,82.95
Df Residuals:,41,BIC:,108.8
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-450.8534,77.107,-5.847,0.000,-606.575,-295.132
C(treatment)[T.1],-6.4052,1.102,-5.812,0.000,-8.631,-4.180
C(Post)[T.1],-0.0937,0.432,-0.217,0.829,-0.967,0.779
C(treatment)[T.1]:C(Post)[T.1],0.3223,0.539,0.598,0.553,-0.766,1.411
Population,-4.71e-06,8.69e-07,-5.417,0.000,-6.47e-06,-2.95e-06
M_minus_F,-344.5664,82.088,-4.198,0.000,-510.347,-178.786
W_minus_Black,-344.5664,82.088,-4.198,0.000,-510.347,-178.786
W_minus_Asian,-35.2314,40.641,-0.867,0.391,-117.308,46.845
W_minus_Native,41.0410,7.208,5.694,0.000,26.484,55.598

0,1,2,3
Omnibus:,0.947,Durbin-Watson:,1.673
Prob(Omnibus):,0.623,Jarque-Bera (JB):,0.463
Skew:,0.21,Prob(JB):,0.793
Kurtosis:,3.171,Cond. No.,1.14e+24


# Model 1A - Treatment, Pre/Post, Population, Sex, Race, Age

In [25]:
model1a = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + Population + M_minus_F + W_minus_Black + W_minus_Asian + W_minus_Native + W_minus_PI + Pct_0_18 + Pct_18_25 + Pct_25_40 + Pct_40_65 + Pct_65', data=selected_df).fit()
model1a.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.941
Model:,OLS,Adj. R-squared:,0.924
Method:,Least Squares,F-statistic:,54.64
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,2.67e-21
Time:,19:45:52,Log-Likelihood:,-28.476
No. Observations:,54,AIC:,82.95
Df Residuals:,41,BIC:,108.8
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-450.8534,77.107,-5.847,0.000,-606.575,-295.132
C(treatment)[T.1],-6.4052,1.102,-5.812,0.000,-8.631,-4.180
C(Post)[T.1],-0.0937,0.432,-0.217,0.829,-0.967,0.779
C(treatment)[T.1]:C(Post)[T.1],0.3223,0.539,0.598,0.553,-0.766,1.411
Population,-4.71e-06,8.69e-07,-5.417,0.000,-6.47e-06,-2.95e-06
M_minus_F,-344.5664,82.088,-4.198,0.000,-510.347,-178.786
W_minus_Black,-344.5664,82.088,-4.198,0.000,-510.347,-178.786
W_minus_Asian,-35.2314,40.641,-0.867,0.391,-117.308,46.845
W_minus_Native,41.0410,7.208,5.694,0.000,26.484,55.598

0,1,2,3
Omnibus:,0.947,Durbin-Watson:,1.673
Prob(Omnibus):,0.623,Jarque-Bera (JB):,0.463
Skew:,0.21,Prob(JB):,0.793
Kurtosis:,3.171,Cond. No.,1.14e+24


## Model 2 - Treatment, Population, Sex

In [26]:
model2 = smf.ols('Unemployed_Rate ~ C(treatment) + Population + Female_pct + Male_pct', data=final_df).fit()
model2.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.659
Model:,OLS,Adj. R-squared:,0.641
Method:,Least Squares,F-statistic:,36.1
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,4.05e-13
Time:,19:45:52,Log-Likelihood:,193.13
No. Observations:,60,AIC:,-378.3
Df Residuals:,56,BIC:,-369.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0756,0.006,12.876,0.000,0.064,0.087
C(treatment)[T.1],-0.0322,0.005,-7.079,0.000,-0.041,-0.023
Population,-3.042e-08,3.69e-09,-8.243,0.000,-3.78e-08,-2.3e-08
Female_pct,3.5893,0.344,10.431,0.000,2.900,4.279
Male_pct,-3.5136,0.340,-10.338,0.000,-4.194,-2.833

0,1,2,3
Omnibus:,2.079,Durbin-Watson:,0.395
Prob(Omnibus):,0.354,Jarque-Bera (JB):,1.401
Skew:,-0.047,Prob(JB):,0.496
Kurtosis:,3.743,Cond. No.,2.24e+22


## Model 2A - Treatment, Population, Pre/Post, Race, Sex

In [27]:
model2a = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + Population + M_minus_F + W_minus_Black + W_minus_Asian + W_minus_Native + W_minus_PI', data=selected_df).fit()
model2a.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.851
Method:,Least Squares,F-statistic:,38.82
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,1.05e-17
Time:,19:45:52,Log-Likelihood:,-49.153
No. Observations:,54,AIC:,116.3
Df Residuals:,45,BIC:,134.2
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-591.2724,131.952,-4.481,0.000,-857.037,-325.508
C(treatment)[T.1],-7.6158,0.921,-8.266,0.000,-9.471,-5.760
C(Post)[T.1],-1.3762,0.289,-4.759,0.000,-1.959,-0.794
C(treatment)[T.1]:C(Post)[T.1],0.7640,0.447,1.711,0.094,-0.135,1.663
Population,-6.861e-06,1.02e-06,-6.708,0.000,-8.92e-06,-4.8e-06
M_minus_F,-397.2226,54.342,-7.310,0.000,-506.673,-287.772
W_minus_Black,-397.2226,54.342,-7.310,0.000,-506.673,-287.772
W_minus_Asian,-74.8466,20.680,-3.619,0.001,-116.497,-33.196
W_minus_Native,30.0795,8.981,3.349,0.002,11.991,48.168

0,1,2,3
Omnibus:,1.417,Durbin-Watson:,1.214
Prob(Omnibus):,0.492,Jarque-Bera (JB):,0.678
Skew:,0.035,Prob(JB):,0.713
Kurtosis:,3.544,Cond. No.,2.05e+24


## Model 3 - Treatment,Population,Race

In [28]:
model2a = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + Population + M_minus_F + W_minus_Black + W_minus_Asian + W_minus_Native + W_minus_PI', data=selected_df).fit()
model2a.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.851
Method:,Least Squares,F-statistic:,38.82
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,1.05e-17
Time:,19:45:52,Log-Likelihood:,-49.153
No. Observations:,54,AIC:,116.3
Df Residuals:,45,BIC:,134.2
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-591.2724,131.952,-4.481,0.000,-857.037,-325.508
C(treatment)[T.1],-7.6158,0.921,-8.266,0.000,-9.471,-5.760
C(Post)[T.1],-1.3762,0.289,-4.759,0.000,-1.959,-0.794
C(treatment)[T.1]:C(Post)[T.1],0.7640,0.447,1.711,0.094,-0.135,1.663
Population,-6.861e-06,1.02e-06,-6.708,0.000,-8.92e-06,-4.8e-06
M_minus_F,-397.2226,54.342,-7.310,0.000,-506.673,-287.772
W_minus_Black,-397.2226,54.342,-7.310,0.000,-506.673,-287.772
W_minus_Asian,-74.8466,20.680,-3.619,0.001,-116.497,-33.196
W_minus_Native,30.0795,8.981,3.349,0.002,11.991,48.168

0,1,2,3
Omnibus:,1.417,Durbin-Watson:,1.214
Prob(Omnibus):,0.492,Jarque-Bera (JB):,0.678
Skew:,0.035,Prob(JB):,0.713
Kurtosis:,3.544,Cond. No.,2.05e+24


## Model 3 - Treatment,Population,Race

In [29]:
model3 = smf.ols('Unemployed_Rate ~ C(treatment) + Population + White_pct + Black_pct + Native_pct + Asian_pct + PI_pct', data=final_df).fit()
model3.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.803
Model:,OLS,Adj. R-squared:,0.781
Method:,Least Squares,F-statistic:,36.0
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,5.19e-17
Time:,19:45:52,Log-Likelihood:,209.57
No. Observations:,60,AIC:,-405.1
Df Residuals:,53,BIC:,-390.5
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1833,0.022,8.338,0.000,0.139,0.227
C(treatment)[T.1],-0.0855,0.010,-8.331,0.000,-0.106,-0.065
Population,-8.015e-08,1.09e-08,-7.387,0.000,-1.02e-07,-5.84e-08
White_pct,-2.8603,0.708,-4.038,0.000,-4.281,-1.440
Black_pct,3.0435,0.700,4.345,0.000,1.639,4.448
Native_pct,-0.2662,0.082,-3.258,0.002,-0.430,-0.102
Asian_pct,0.9132,0.226,4.046,0.000,0.461,1.366
PI_pct,-14.1891,2.692,-5.271,0.000,-19.588,-8.790

0,1,2,3
Omnibus:,0.177,Durbin-Watson:,0.482
Prob(Omnibus):,0.916,Jarque-Bera (JB):,0.076
Skew:,0.084,Prob(JB):,0.963
Kurtosis:,2.95,Cond. No.,2.38e+22


# Model 3A - Treatment, Pre/Post, Population, Race

In [30]:
model3a = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + Population + W_minus_Black + W_minus_Asian + W_minus_Native + W_minus_PI ', data=selected_df).fit()
model3a.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.851
Method:,Least Squares,F-statistic:,38.82
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,1.05e-17
Time:,19:45:52,Log-Likelihood:,-49.153
No. Observations:,54,AIC:,116.3
Df Residuals:,45,BIC:,134.2
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-591.2724,131.952,-4.481,0.000,-857.037,-325.508
C(treatment)[T.1],-7.6158,0.921,-8.266,0.000,-9.471,-5.760
C(Post)[T.1],-1.3762,0.289,-4.759,0.000,-1.959,-0.794
C(treatment)[T.1]:C(Post)[T.1],0.7640,0.447,1.711,0.094,-0.135,1.663
Population,-6.861e-06,1.02e-06,-6.708,0.000,-8.92e-06,-4.8e-06
W_minus_Black,-794.4451,108.684,-7.310,0.000,-1013.346,-575.544
W_minus_Asian,-74.8466,20.680,-3.619,0.001,-116.497,-33.196
W_minus_Native,30.0795,8.981,3.349,0.002,11.991,48.168
W_minus_PI,1279.1800,277.608,4.608,0.000,720.050,1838.310

0,1,2,3
Omnibus:,1.417,Durbin-Watson:,1.214
Prob(Omnibus):,0.492,Jarque-Bera (JB):,0.678
Skew:,0.035,Prob(JB):,0.713
Kurtosis:,3.544,Cond. No.,7800000000.0


## Model 4 - Treatment, Population, Age

In [31]:
model4 = smf.ols('Unemployed_Rate ~ C(treatment) + Population + Pct_0_18 + Pct_18_25 + Pct_25_40 + Pct_40_65 + Pct_65', data=final_df).fit()
model4.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.821
Model:,OLS,Adj. R-squared:,0.8
Method:,Least Squares,F-statistic:,40.42
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,4.48e-18
Time:,19:45:52,Log-Likelihood:,212.4
No. Observations:,60,AIC:,-410.8
Df Residuals:,53,BIC:,-396.1
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.1767,0.036,-4.964,0.000,-0.248,-0.105
C(treatment)[T.1],-0.0046,0.005,-0.848,0.400,-0.015,0.006
Population,5.373e-09,4.12e-09,1.303,0.198,-2.89e-09,1.36e-08
Pct_0_18,0.3576,0.166,2.153,0.036,0.024,0.691
Pct_18_25,-0.9243,0.191,-4.832,0.000,-1.308,-0.541
Pct_25_40,-0.2271,0.196,-1.160,0.251,-0.620,0.166
Pct_40_65,0.5400,0.038,14.098,0.000,0.463,0.617
Pct_65,-0.0995,0.073,-1.356,0.181,-0.247,0.048

0,1,2,3
Omnibus:,4.638,Durbin-Watson:,0.429
Prob(Omnibus):,0.098,Jarque-Bera (JB):,3.702
Skew:,0.474,Prob(JB):,0.157
Kurtosis:,3.762,Cond. No.,2.29e+22


## Try State Fixed Effects

In [32]:

#Add pre/post
pre = df[np.logical_and(df['Year'] >= 2010,df['Year'] <= 2014)].copy()
pre['Post'] = 0
post = df[np.logical_and(df['Year'] >= 2016,df['Year'] <= 2019)].copy()
post['Post'] = 1 

fe_df = pd.concat([pre, post])
fe_df.head()

Unnamed: 0,State,Year,FIPS_Code,Civilian_Pop,Civilian_Labor_Force,Labor_Force_Pct,Employed_Total,Employed_Pct,Unemployed_Total,Unemployed_Rate,Population,gdp,treatment,Post
0,Idaho,2010,16.0,1165430.0,762158.666667,65.408333,695375.583333,59.683333,66783.083333,8.758333,1570746.0,57952.9,0,0
1,Idaho,2011,16.0,1179092.0,764066.916667,64.8,701595.75,59.508333,62471.166667,8.166667,1583910.0,57825.3,0,0
2,Idaho,2012,16.0,1193211.0,772073.333333,64.708333,715011.583333,59.908333,57061.75,7.4,1595324.0,57780.1,0,0
3,Idaho,2013,16.0,1208621.0,777682.75,64.35,724710.75,59.966667,52972.0,6.816667,1611206.0,59966.8,0,0
4,Idaho,2014,16.0,1226459.0,777061.5,63.358333,742988.166667,60.575,34073.333333,4.383333,1631112.0,61663.2,0,0


### Fixed Effects Model 1 - Unemployment Rate

In [33]:
model_fe = smf.ols('Unemployed_Rate ~ C(treatment) + C(Post) + C(treatment)*C(Post) + C(State) ', data=fe_df).fit()
model_fe.summary()

0,1,2,3
Dep. Variable:,Unemployed_Rate,R-squared:,0.794
Model:,OLS,Adj. R-squared:,0.763
Method:,Least Squares,F-statistic:,25.33
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,8.76e-14
Time:,19:45:52,Log-Likelihood:,-62.304
No. Observations:,54,AIC:,140.6
Df Residuals:,46,BIC:,156.5
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.5712,0.312,21.075,0.000,5.944,7.199
C(treatment)[T.1],-1.0563,0.284,-3.721,0.001,-1.628,-0.485
C(Post)[T.1],-2.8018,0.322,-8.704,0.000,-3.450,-2.154
C(State)[T.Iowa],-1.1278,0.392,-2.879,0.006,-1.916,-0.339
C(State)[T.Kansas],-0.5574,0.392,-1.423,0.162,-1.346,0.231
C(State)[T.Nebraska],-1.3691,0.245,-5.584,0.000,-1.863,-0.876
C(State)[T.South Dakota],-1.2746,0.245,-5.198,0.000,-1.768,-0.781
C(State)[T.West Virginia],1.5874,0.245,6.474,0.000,1.094,2.081
C(treatment)[T.1]:C(Post)[T.1],1.4092,0.455,3.096,0.003,0.493,2.325

0,1,2,3
Omnibus:,2.523,Durbin-Watson:,1.399
Prob(Omnibus):,0.283,Jarque-Bera (JB):,1.883
Skew:,0.038,Prob(JB):,0.39
Kurtosis:,3.912,Cond. No.,1.57e+16


### Fixed Effects Model 2 - GDP

In [34]:
model_fe = smf.ols('gdp ~ C(treatment) + C(Post) + C(treatment)*C(Post) + C(State) ', data=fe_df).fit()
model_fe.summary()

0,1,2,3
Dep. Variable:,gdp,R-squared:,0.995
Model:,OLS,Adj. R-squared:,0.994
Method:,Least Squares,F-statistic:,1265.0
Date:,"Wed, 30 Jun 2021",Prob (F-statistic):,2.3e-50
Time:,19:45:53,Log-Likelihood:,-512.16
No. Observations:,54,AIC:,1040.0
Df Residuals:,46,BIC:,1056.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.781e+04,1293.676,44.686,0.000,5.52e+04,6.04e+04
C(treatment)[T.1],1.048e+04,1177.847,8.899,0.000,8110.831,1.29e+04
C(Post)[T.1],1.409e+04,1335.553,10.549,0.000,1.14e+04,1.68e+04
C(State)[T.Iowa],9.909e+04,1625.584,60.958,0.000,9.58e+04,1.02e+05
C(State)[T.Kansas],8.309e+04,1625.584,51.113,0.000,7.98e+04,8.64e+04
C(State)[T.Nebraska],3.729e+04,1017.344,36.657,0.000,3.52e+04,3.93e+04
C(State)[T.South Dakota],-2.612e+04,1017.344,-25.671,0.000,-2.82e+04,-2.41e+04
C(State)[T.West Virginia],-695.0100,1017.344,-0.683,0.498,-2742.817,1352.797
C(treatment)[T.1]:C(Post)[T.1],-8567.2850,1888.758,-4.536,0.000,-1.24e+04,-4765.413

0,1,2,3
Omnibus:,1.083,Durbin-Watson:,1.727
Prob(Omnibus):,0.582,Jarque-Bera (JB):,0.429
Skew:,-0.108,Prob(JB):,0.807
Kurtosis:,3.379,Cond. No.,1.57e+16
