Importing libraries and dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_stata("eitc.dta")
dataset.head()

Unnamed: 0,state,year,urate,children,nonwhite,finc,earn,age,ed,work,unearn
0,11.0,1991.0,7.6,0,1,18714.394273,18714.394273,26,10,1,0.0
1,12.0,1991.0,7.2,1,0,4838.568282,471.365639,22,9,1,4.367203
2,13.0,1991.0,6.4,2,0,8178.193833,0.0,33,11,0,8.178194
3,14.0,1991.0,9.1,0,1,9369.570485,0.0,43,11,0,9.36957
4,15.0,1991.0,8.6,3,1,14706.60793,14706.60793,23,7,1,0.0


Preparing dummy variables

In [3]:
dataset["post93"] = np.where(dataset["year"] > 1993,1,0)
dataset["mom"] = np.where(dataset["children"]>0,1,0)
dataset["mom_post93"] = dataset["mom"] * dataset["post93"]
dataset.head()

Unnamed: 0,state,year,urate,children,nonwhite,finc,earn,age,ed,work,unearn,post93,mom,mom_post93
0,11.0,1991.0,7.6,0,1,18714.394273,18714.394273,26,10,1,0.0,0,0,0
1,12.0,1991.0,7.2,1,0,4838.568282,471.365639,22,9,1,4.367203,0,1,0
2,13.0,1991.0,6.4,2,0,8178.193833,0.0,33,11,0,8.178194,0,1,0
3,14.0,1991.0,9.1,0,1,9369.570485,0.0,43,11,0,9.36957,0,0,0
4,15.0,1991.0,8.6,3,1,14706.60793,14706.60793,23,7,1,0.0,0,1,0


Creating the X and Y variables

In [4]:
Y = dataset.loc[:,"work"].values
X = dataset.loc[:, ["post93", "mom", "mom_post93"]]

Logistic regression

In [5]:
import statsmodels.api as sm
X = sm.add_constant=(X)

In [6]:
model_a = sm.Logit(Y,X).fit()
model_a.summary(yname = "Work", 
                xname = ("After 1993", "Is mom", "Mom after 1993"),
               title = "Impact of tax credit on employment - Model A")

Optimization terminated successfully.
         Current function value: 0.689114
         Iterations 4


0,1,2,3
Dep. Variable:,Work,No. Observations:,13746.0
Model:,Logit,Df Residuals:,13743.0
Method:,MLE,Df Model:,2.0
Date:,"Fri, 29 Sep 2023",Pseudo R-squ.:,0.005332
Time:,16:02:03,Log-Likelihood:,-9472.6
converged:,True,LL-Null:,-9523.3
Covariance Type:,nonrobust,LLR p-value:,8.878999999999999e-23

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
After 1993,0.2957,0.038,7.701,0.000,0.220,0.371
Is mom,-0.2170,0.031,-7.029,0.000,-0.278,-0.156
Mom after 1993,-0.1156,0.060,-1.942,0.052,-0.232,0.001


In [10]:
X = dataset.loc[:, ["post93", "mom", "mom_post93", "nonwhite", "ed"]].values

X = sm.add_constant=(X)

model_b = sm.Logit(Y,X).fit()
model_b.summary(yname = "Work", 
                xname = ("After 1993", "Is mom", "Mom after 1993", "Hispanic or Black", "Yeas of education"),
               title = "Impact of tax credit on employment - Model B")

Optimization terminated successfully.
         Current function value: 0.680868
         Iterations 4


0,1,2,3
Dep. Variable:,Work,No. Observations:,13746.0
Model:,Logit,Df Residuals:,13741.0
Method:,MLE,Df Model:,4.0
Date:,"Fri, 29 Sep 2023",Pseudo R-squ.:,0.01723
Time:,16:05:39,Log-Likelihood:,-9359.2
converged:,True,LL-Null:,-9523.3
Covariance Type:,nonrobust,LLR p-value:,8.707e-70

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
After 1993,-0.0469,0.050,-0.940,0.347,-0.145,0.051
Is mom,-0.5617,0.046,-12.193,0.000,-0.652,-0.471
Mom after 1993,0.2416,0.067,3.580,0.000,0.109,0.374
Hispanic or Black,-0.2486,0.034,-7.364,0.000,-0.315,-0.182
Yeas of education,0.0557,0.004,14.620,0.000,0.048,0.063


Preparing dummy variables for placebo experiment

In [11]:
dataset["post92"] = np.where(dataset["year"] > 1992,1,0)
dataset["mom_post92"] = dataset["mom"] * dataset["post92"]
dataset.head()

Unnamed: 0,state,year,urate,children,nonwhite,finc,earn,age,ed,work,unearn,post93,mom,mom_post93,post92,mom_post92
0,11.0,1991.0,7.6,0,1,18714.394273,18714.394273,26,10,1,0.0,0,0,0,0,0
1,12.0,1991.0,7.2,1,0,4838.568282,471.365639,22,9,1,4.367203,0,1,0,0,0
2,13.0,1991.0,6.4,2,0,8178.193833,0.0,33,11,0,8.178194,0,1,0,0,0
3,14.0,1991.0,9.1,0,1,9369.570485,0.0,43,11,0,9.36957,0,0,0,0,0
4,15.0,1991.0,8.6,3,1,14706.60793,14706.60793,23,7,1,0.0,0,1,0,0,0


Preparing placebo dataset

In [12]:
dataset_placebo = dataset[dataset["year"] < 1994]

Logistic regression for placebo

In [13]:
Y_placebo = dataset_placebo.loc[:,"work"].values
X_placebo= dataset_placebo.loc[:, ["post92", "mom", "mom_post92"]].values

X_placebo = sm.add_constant=(X_placebo)

model_placebo = sm.Logit(Y_placebo,X_placebo).fit()
model_placebo.summary(yname = "Work", 
                xname = ("After 1992", "Is mom", "Mom after 1992"),
               title = "Impact of tax credit on employment - Model Placebo")

Optimization terminated successfully.
         Current function value: 0.688372
         Iterations 4


0,1,2,3
Dep. Variable:,Work,No. Observations:,7401.0
Model:,Logit,Df Residuals:,7398.0
Method:,MLE,Df Model:,2.0
Date:,"Fri, 29 Sep 2023",Pseudo R-squ.:,0.006885
Time:,16:15:51,Log-Likelihood:,-5094.6
converged:,True,LL-Null:,-5130.0
Covariance Type:,nonrobust,LLR p-value:,4.576e-16

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
After 1992,0.2865,0.064,4.495,0.000,0.162,0.411
Is mom,-0.2014,0.037,-5.404,0.000,-0.274,-0.128
Mom after 1992,-0.3363,0.092,-3.650,0.000,-0.517,-0.156
