In [5]:
import pandas as pd
import numpy as np

# Project 3: Recreation of Short Run Demand for Palestinian Labor by Joshua D. Angrist


# Abstract:
In this project, we attempt to replicate a paper *Short-Run Demand for Palestinian Labor* by Joshua D. Angrist.

First we will read in the data:

In [84]:
df_data = pd.read_stata('../data/data8191.dta')
df_data

Unnamed: 0,v1,v4,v14,v27,v8,v36,mnthwage,workloc,region,area,...,lnmnth,lnday,sample,lfp,unem,fulltime,parttime,age,inschool,test
0,12.0,3,,,,43.000000,,OTHER,GAZA,RAFIACH,...,,,1,1,0,1,0,24.0,0,
1,12.0,3,7.0,,6,44.000000,,,GAZA,RAFIACH,...,,,1,0,0,0,0,19.0,0,
2,12.0,3,7.0,,,46.000000,,,GAZA,RAFIACH,...,,,1,0,0,0,0,25.0,0,
3,12.0,3,7.0,,,43.000000,,,GAZA,RAFIACH,...,,,1,0,0,0,0,19.0,0,
4,12.0,3,7.0,,,44.000000,,,GAZA,RAFIACH,...,,,1,0,0,0,0,21.0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373820,4.0,1,,,,28.870001,1200.0,ISRAEL,WBANK,JENIN,...,7.0625,3.6875,2,1,0,1,0,20.0,0,
373821,4.0,1,,,,28.870001,1400.0,ISRAEL,WBANK,JENIN,...,7.1875,3.8125,2,1,1,0,0,24.0,0,
373822,8.0,1,,,,34.759998,1200.0,JLEM,WBANK,BETHLEM,...,7.0625,3.6875,2,1,0,1,0,30.0,0,
373823,12.0,4,,,,35.889999,,JLEM,WBANK,BETHLEM,...,,,1,1,0,1,0,24.0,0,


In [85]:
df = df_data
# Recreating variables
df['count'] = 1
df['wis'] = df['workloc'].isin(['JLEM', 'ISRAEL'])
df['gazan'] = df['region'] == 'GAZA'
if 'v36' in df.columns:
    df.rename(columns={'v36': 'weight'}, inplace=True)
df

Unnamed: 0,v1,v4,v14,v27,v8,weight,mnthwage,workloc,region,area,...,lfp,unem,fulltime,parttime,age,inschool,test,count,wis,gazan
0,12.0,3,,,,43.000000,,OTHER,GAZA,RAFIACH,...,1,0,1,0,24.0,0,,1,False,True
1,12.0,3,7.0,,6,44.000000,,,GAZA,RAFIACH,...,0,0,0,0,19.0,0,,1,False,True
2,12.0,3,7.0,,,46.000000,,,GAZA,RAFIACH,...,0,0,0,0,25.0,0,,1,False,True
3,12.0,3,7.0,,,43.000000,,,GAZA,RAFIACH,...,0,0,0,0,19.0,0,,1,False,True
4,12.0,3,7.0,,,44.000000,,,GAZA,RAFIACH,...,0,0,0,0,21.0,0,,1,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373820,4.0,1,,,,28.870001,1200.0,ISRAEL,WBANK,JENIN,...,1,0,1,0,20.0,0,,1,True,False
373821,4.0,1,,,,28.870001,1400.0,ISRAEL,WBANK,JENIN,...,1,1,0,0,24.0,0,,1,True,False
373822,8.0,1,,,,34.759998,1200.0,JLEM,WBANK,BETHLEM,...,1,0,1,0,30.0,0,,1,True,False
373823,12.0,4,,,,35.889999,,JLEM,WBANK,BETHLEM,...,1,0,1,0,24.0,0,,1,True,False


In [86]:
#Recreating table 1

# Descriptive statistics for Gazan and Non-Gazan regions
gazan_df = df[df['gazan'] == 1]
non_gazan_df = df[df['gazan'] == 0]
columns = ['count', 'educ', 'age', 'lfp', 'sachir', 'wrkdays', 'married']
gazan_stats = gazan_df.groupby('year')[columns].agg(['mean', 'count'])
non_gazan_stats = non_gazan_df.groupby('year')[columns].agg(['mean', 'count'])

gazan_stats.head()

Unnamed: 0_level_0,count,count,educ,educ,age,age,lfp,lfp,sachir,sachir,wrkdays,wrkdays,married,married
Unnamed: 0_level_1,mean,count,mean,count,mean,count,mean,count,mean,count,mean,count,mean,count
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
81,1.0,7854,7.607006,7850,33.042908,7854,0.775401,7854,0.504966,7854,22.207871,3964,0.666285,7854
82,1.0,8223,7.918491,8220,32.62763,8223,0.7704,8223,0.51587,8223,21.879566,4243,0.676639,8223
83,1.0,8228,8.184492,8228,32.657876,8228,0.758508,8228,0.502795,8228,21.567078,4137,0.662008,8228
84,1.0,8930,8.326428,8930,32.867301,8930,0.761814,8930,0.499328,8930,21.899529,4459,0.663942,8930
85,1.0,9271,8.404574,9269,32.674792,9271,0.769604,9271,0.512135,9271,21.663648,4748,0.672743,9271


In [78]:
# Weighted mean of wis
gazan_wis = gazan_df.groupby('year').apply(
    lambda x: (x['wis'] * x['weight']).sum() / x['weight'].sum(),
    include_groups=False
).to_frame(name='mean_wis')

non_gazan_wis = non_gazan_df.groupby('year').apply(
    lambda x: (x['wis'] * x['weight']).sum() / x['weight'].sum(),
    include_groups=False
).to_frame(name='mean_wis')

#Flattening
gazan_stats.columns = [''.join(col).strip() for col in gazan_stats.columns]
non_gazan_stats.columns = [''.join(col).strip() for col in non_gazan_stats.columns]


In [79]:
# Merging weighted wis
gazan_table1 = gazan_stats.merge(gazan_wis, left_index=True, right_index=True)
non_gazan_table1 = non_gazan_stats.merge(non_gazan_wis, left_index=True, right_index=True)

gazan_table1.head()

Unnamed: 0_level_0,countmean,countcount,educmean,educcount,agemean,agecount,lfpmean,lfpcount,sachirmean,sachircount,wrkdaysmean,wrkdayscount,marriedmean,marriedcount,mean_wis
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
81,1.0,7854,7.607006,7850,33.042908,7854,0.775401,7854,0.504966,7854,22.207871,3964,0.666285,7854,0.362486
82,1.0,8223,7.918491,8220,32.62763,8223,0.7704,8223,0.51587,8223,21.879566,4243,0.676639,8223,0.362778
83,1.0,8228,8.184492,8228,32.657876,8228,0.758508,8228,0.502795,8228,21.567078,4137,0.662008,8228,0.376623
84,1.0,8930,8.326428,8930,32.867301,8930,0.761814,8930,0.499328,8930,21.899529,4459,0.663942,8930,0.373516
85,1.0,9271,8.404574,9269,32.674792,9271,0.769604,9271,0.512135,9271,21.663648,4748,0.672743,9271,0.373997


In [80]:
#Cleaning
columns_to_keep = [
    'countcount',
    'educmean',
    'agemean',
    'lfpmean',
    'sachirmean',
    'wrkdaysmean',
    'marriedmean',
    'mean_wis'
]

gazan_table1_cleaned = gazan_table1.copy()
gazan_table1_cleaned = gazan_table1_cleaned[columns_to_keep]

gazan_table1_cleaned.rename(columns={
    'countcount': 'Sample Size',
    'educmean': 'Years of Schooling',
    'agemean': 'Age',
    'lfpmean': 'Labor Force Participation',
    'sachirmean': 'Wage Earner',
    'wrkdaysmean': 'Days Worked',
    'marriedmean': 'Married',
    'mean_wis': 'Work in Israel'
}, inplace=True)

non_gazan_table1_cleaned = non_gazan_table1.copy()
non_gazan_table1_cleaned = non_gazan_table1_cleaned[columns_to_keep]

non_gazan_table1_cleaned.rename(columns={
    'countcount': 'Sample Size',
    'educmean': 'Years of Schooling',
    'agemean': 'Age',
    'lfpmean': 'Labor Force Participation',
    'sachirmean': 'Wage Earner',
    'wrkdaysmean': 'Days Worked',
    'marriedmean': 'Married',
    'mean_wis': 'Work in Israel'
}, inplace=True)

#ISSUE; workinisrael off by .1 for both tables consistenlty

In [81]:
gazan_table1_cleaned.head()

Unnamed: 0_level_0,Sample Size,Years of Schooling,Age,Labor Force Participation,Wage Earner,Days Worked,Married,Work in Israel
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
81,7854,7.607006,33.042908,0.775401,0.504966,22.207871,0.666285,0.362486
82,8223,7.918491,32.62763,0.7704,0.51587,21.879566,0.676639,0.362778
83,8228,8.184492,32.657876,0.758508,0.502795,21.567078,0.662008,0.376623
84,8930,8.326428,32.867301,0.761814,0.499328,21.899529,0.663942,0.373516
85,9271,8.404574,32.674792,0.769604,0.512135,21.663648,0.672743,0.373997


In [82]:
non_gazan_table1_cleaned.head()

Unnamed: 0_level_0,Sample Size,Years of Schooling,Age,Labor Force Participation,Wage Earner,Days Worked,Married,Work in Israel
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
81,21768,7.658872,33.206725,0.696986,0.470323,21.853096,0.637449,0.26241
82,21992,7.815561,33.269325,0.71667,0.473263,22.3701,0.634413,0.269313
83,22734,7.903581,33.279889,0.7436,0.474927,22.138768,0.637415,0.286631
84,23807,7.980593,33.325408,0.755786,0.478893,22.072976,0.631537,0.287477
85,25038,8.105217,33.132958,0.761642,0.472322,21.786065,0.621495,0.271708
