In [4]:
import pandas as pd
import numpy as np

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

#suppress warnings
import warnings
warnings.simplefilter("ignore")


In [5]:
df=pd.read_stata('ps1.dta')

In [6]:
df['employed']=np.where(df['work']==1,1,0)
df['unemployed']=np.where(df['work']==0,1,0)
df['parent']=np.where(df['children']!=0,1,0)

#pivot by year and parent and then reset the index
df1=df.groupby(['year', 'parent']).sum()
df1=df1.reset_index()

#calculate the lfpr for both parents and no parents
df1['urate']=(df1['employed'])/(df1['employed']+df1['unemployed'])
parent=df1[df1['parent']==1]
nparent=df1[df1['parent']==0]

In [7]:
# Add data
year = parent['year']
parentLMPR= parent['urate']
nparentLMPR = nparent['urate']


# Create and style traces
trace0 = go.Scatter(
    x = year,
    y = parentLMPR,
    name = 'W/ Children',
    line = dict(
        color = ('blue'),
        width = 2)
)
trace1 = go.Scatter(
    x = year,
    y = nparentLMPR,
    name = 'W/O Children',
    line = dict(
        color = ('red'),
        width = 2,)
)


data = [trace0, trace1]

# Edit the layout
layout = dict(title = 'Single Women Labor Market Participation Rates',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'LMPR'),
              )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='raw-plot')

### Reindex the LMPR to 1 in 1991

In [8]:
pBaseLevel=parent.iloc[0,3]
nBaseLevel=nparent.iloc[0,3]
parent['index']=parent['urate']/pBaseLevel
nparent['index']=nparent['urate']/nBaseLevel

nBaseLevel

0.5830324909747292

In [9]:

# Add data
year = parent['year']
piLMPR= parent['index']
niLMPR = nparent['index']


# Create and style traces
trace0 = go.Scatter(
    x = year,
    y = piLMPR,
    name = 'W/ Children',
    line = dict(
        color = ('blue'),
        width = 2)
)
trace1 = go.Scatter(
    x = year,
    y = niLMPR,
    name = 'W/O Children',
    line = dict(
        color = ('red'),
        width = 2,)
)


data = [trace0, trace1]

# Edit the layout
layout = dict(title = 'Single Women Labor Market Participation Rates, Indexed to 1991 rates',
              xaxis = dict(title = 'Year'),
              yaxis = dict(title = 'LMPR'),
              )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='index-plot')

TODO:  Insert comments on validity of using single women with children as a control group.

### In this section I calculate the difference in differences for table two.  I start back at the original dataframe to get clean averages.

In [10]:

parent=df[df['parent']==1]
nparent=df[df['parent']!=1]

#calculate the average of the treatment group pre-1994
tc1=parent[parent['year']<1994]
tc1_empl=tc1['work'].sum()
tc1_mean=tc1_empl/len(tc1)

#calculate the average of the treatment group post-1994
tc2=parent[parent['year']>1993]
tc2_empl=tc2['work'].sum()
tc2_mean=tc2_empl/len(tc1)

#calculate the average of the control group pre-1994
cg1=nparent[nparent['year']<1994]
cg1_empl=cg1['work'].sum()
cg1_mean=cg1_empl/len(cg1)

#calculate the average of the control group post-1994
cg2=nparent[nparent['year']>1993]
cg2_empl=cg2['work'].sum()
cg2_mean=cg2_empl/len(tc1)

#calculate diffs
dif1=tc2_mean-tc1_mean
dif2=cg2_mean-cg1_mean
dif_dif=dif1-dif2

#print (tc1_mean, tc2_mean, cg1_mean, cg2_mean)

l1=["Treatment Group", len(parent), tc1_mean, tc2_mean, dif1, '']
l2=["Control Group", len(nparent), cg1_mean, cg2_mean, dif2, dif_dif]

table=[l1, l2]

headers=['Group', 'Sample Size', 'Pre-1993', 'Post-1993', 'Difference', 'Difference-in-differences']

table2=pd.DataFrame(table, columns=headers)

table2


Unnamed: 0,Group,Sample Size,Pre-1993,Post-1993,Difference,Difference-in-differences
0,Treatment Group,7819,0.445962,0.412762,-0.0332,
1,Control Group,5927,0.57546,0.374382,-0.201078,0.167878


### TODO:  Comment on this table

### Now we do the same comparison except for women with one child and women with two or more children.

In [11]:
df

Unnamed: 0,state,year,urate,children,nonwhite,finc,earn,age,ed,work,unearn,employed,unemployed,parent
0,11.0,1991.0,7.6,3,0,7970.792951,7970.792951,39,10,1,0.000000,1,0,1
1,11.0,1991.0,7.6,0,0,31227.973568,14730.176211,48,11,1,16497.797357,1,0,0
2,11.0,1991.0,7.6,0,0,6143.072687,589.207048,36,7,1,5553.865639,1,0,0
3,11.0,1991.0,7.6,0,0,16761.762115,0.000000,44,7,0,16761.762115,0,1,0
4,11.0,1991.0,7.6,1,1,7424.008811,2121.145374,21,9,1,5302.863436,1,0,1
5,11.0,1991.0,7.6,0,0,8248.898678,8248.898678,20,10,1,0.000000,1,0,0
6,11.0,1991.0,7.6,0,0,7183.612335,0.000000,44,7,0,7183.612335,0,1,0
7,11.0,1991.0,7.6,2,0,6405.859031,0.000000,31,10,0,6405.859031,0,1,1
8,11.0,1991.0,7.6,0,1,18714.394273,18714.394273,26,10,1,0.000000,1,0,0
9,11.0,1991.0,7.6,0,0,17482.951542,0.000000,48,7,0,17482.951542,0,1,0


In [16]:
one_child=df[df['children']==1]
two_child=df[df['children']>1]

#calculate the average of the treatment group with one child pre-1994
tg1c1=one_child[one_child['year']<1994]
tg1c1_empl=tg1c1['work'].sum()
tg1c1_mean=tg1c1_empl/len(tg1c1)

#calculate the average of the treatment group with one child post-1994
tg2c1=one_child[one_child['year']>1993]
tg2c1_empl=tg2c1['work'].sum()
tg2c1_mean=tg2c1_empl/len(tg2c1)

#calculate the average of the treatment group with two children pre-1994
tg1c2=two_child[two_child['year']<1994]
tg1c2_empl=tg1c2['work'].sum()
tg1c2_mean=tg1c2_empl/len(tg1c2)

#calculate the average of the treatment group with two child post-1994
tg2c2=two_child[two_child['year']>1993]
tg2c2_empl=tg2c2['work'].sum()
tg2c2_mean=tg2c2_empl/len(tg2c2)

#calculate diffs
dif3=tg1c2_mean-tg1c1_mean
dif4=tg2c2_mean-tg1c2_mean
dif_dif3=dif3-dif2
dif_dif4=dif4-dif2

l3=["One Child", len(one_child), tg1c1_mean, tg2c1_mean, dif3, '']
l4=["Control Group", len(nparent), cg1_mean, cg2_mean, dif2, dif_dif3]

l5=["Two Child", len(two_child), tg1c2_mean, tg2c2_mean, dif4, '']
l6=["Control Group", len(nparent), cg1_mean, cg2_mean, dif2, dif_dif4]

table=[l1, l2, l3, l4, l5, l6]

headers=['Group', 'Sample Size', 'Pre-1993', 'Post-1993', 'Difference', 'Difference-in-differences']

table2=pd.DataFrame(table, columns=headers)

table2



Unnamed: 0,Group,Sample Size,Pre-1993,Post-1993,Difference,Difference-in-differences
0,Treatment Group,7819,0.445962,0.412762,-0.0332,
1,Control Group,5927,0.57546,0.374382,-0.201078,0.167878
2,One Child,3058,0.523579,0.554131,-0.127127,
3,Control Group,5927,0.57546,0.374382,-0.201078,0.0739506
4,Two Child,4761,0.396452,0.449723,0.053271,
5,Control Group,5927,0.57546,0.374382,-0.201078,0.254349
