In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# from scipy.stats import norm, t
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
import seaborn as sns
# from CoefPlot import CoefPlot
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [3]:
nolitold = pd.read_csv(os.path.join(wdir, '_indlevel_gapregs_results_noshrlitold.csv'))
litold = pd.read_csv(os.path.join(wdir, '_indlevel_gapregs_results.csv'))

In [4]:
drop_nolitolt = ['-',
                 '+ district-urban FEs']
drop_litold = ['-',
               'country-birth-decade FE + child age FEs',
               '+ religion-district-specific share of literate old',
               '+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies']

In [5]:
nolitold = nolitold[(~nolitold.controls.isin(drop_nolitolt)) & 
                    (~nolitold.controls.str.contains('above median')) ].reset_index(drop=True)
litold = litold[~litold.controls.isin(drop_litold)].reset_index(drop=True)
litold = litold[~((litold.controls.str.contains('C v T')) & (litold.religion=='muslim'))]
litold = litold[~((litold.controls.str.contains('C v M')) & (litold.religion=='traditional'))]
litold.loc[litold.controls.str.contains('above median'), 'controls'] = 'in above median closest parental education distribution districts: C v X'
litold.loc[litold.controls.str.contains('occupation'), 'controls'] = '+ religion-district-specific share of literate old'

In [6]:
nolitold.controls.value_counts()

+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies    12
country-birth-decade FE + child age FEs                                                                                              12
+ urban + prev. gen. industry + prev. gen. occupation                                                                                12
Name: controls, dtype: int64

In [7]:
litold.controls.value_counts()

in above median closest parental education distribution districts: C v X    12
+ religion-district-specific share of literate old                          12
+ district-urban FEs                                                        12
Name: controls, dtype: int64

In [8]:
dfall = pd.concat([nolitold, litold], axis=0, ignore_index=True)

In [9]:
def specification_order(x):
    
    spord = {}
    spord['country-birth-decade FE + child age FEs'] = 1
    spord['+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies'] = 2
    spord['+ urban + prev. gen. industry + prev. gen. occupation'] = 3
    spord['+ religion-district-specific share of literate old'] = 4
    spord['+ district-urban FEs'] = 5
    spord['in above median closest parental education distribution districts: C v X'] = 6
    
    return spord[x]

In [10]:
dfall.columns = ['samp', 'direction', 'controls', 'religion', 'b', 'se', 'N']
dfall['order'] = dfall.controls.apply(lambda x: specification_order(x))
dfall.loc[dfall.controls == '+ religion-district-specific share of literate old', 'controls'] = '+ religion-district-specific shr literate old'
dfall.loc[dfall.controls == '+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies', 'controls'] = '+ household/family characteristics'
dfall.loc[dfall.controls == '+ urban + prev. gen. industry + prev. gen. occupation', 'controls'] = '+ occupational specialization + urban/rural'
dfall.loc[dfall.controls == 'in above median closest parental education distribution districts: C v M', 'controls'] = 'in > 50% C v X similar old edu. districts'

In [11]:
dfall

Unnamed: 0,samp,direction,controls,religion,b,se,N,order
0,all,down,country-birth-decade FE + child age FEs,muslim,0.040963,0.018844,1189912,1
1,all,down,+ household/family characteristics,muslim,0.034286,0.015528,1189912,2
2,all,down,+ occupational specialization + urban/rural,muslim,0.042037,0.013927,1189912,3
3,all,up,country-birth-decade FE + child age FEs,muslim,-0.081623,0.009545,2727443,1
4,all,up,+ household/family characteristics,muslim,-0.076644,0.008906,2727443,2
...,...,...,...,...,...,...,...,...
67,girls,down,+ district-urban FEs,traditional,-0.007040,0.029500,972272,5
68,girls,down,in above median closest parental education dis...,traditional,-0.016973,0.024465,251144,6
69,girls,up,+ religion-district-specific shr literate old,traditional,-0.090486,0.031870,2352071,4
70,girls,up,+ district-urban FEs,traditional,-0.082022,0.026967,2352071,5


In [12]:
display(dfall[(dfall.samp=='boys') & (dfall.direction=='up') & (dfall.religion == 'muslim')])
display(dfall[(dfall.samp=='boys') & (dfall.direction=='up') & (dfall.religion == 'traditional')])
display(dfall[(dfall.samp=='girls') & (dfall.direction=='up') & (dfall.religion == 'muslim')])
display(dfall[(dfall.samp=='girls') & (dfall.direction=='up') & (dfall.religion == 'traditional')])
display(dfall[(dfall.samp=='boys') & (dfall.direction=='down') & (dfall.religion == 'muslim')])
display(dfall[(dfall.samp=='boys') & (dfall.direction=='down') & (dfall.religion == 'traditional')])
display(dfall[(dfall.samp=='girls') & (dfall.direction=='down') & (dfall.religion == 'muslim')])
display(dfall[(dfall.samp=='girls') & (dfall.direction=='down') & (dfall.religion == 'traditional')])

Unnamed: 0,samp,direction,controls,religion,b,se,N,order
9,boys,up,country-birth-decade FE + child age FEs,muslim,-0.079828,0.009468,1442887,1
10,boys,up,+ household/family characteristics,muslim,-0.076322,0.009016,1442887,2
11,boys,up,+ occupational specialization + urban/rural,muslim,-0.077102,0.009574,1442887,3
45,boys,up,+ religion-district-specific shr literate old,muslim,-0.087102,0.016742,2637859,4
46,boys,up,+ district-urban FEs,muslim,-0.058257,0.009736,2637859,5
47,boys,up,in above median closest parental education dis...,muslim,-0.053295,0.010035,1333380,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
27,boys,up,country-birth-decade FE + child age FEs,traditional,-0.140465,0.030499,1442887,1
28,boys,up,+ household/family characteristics,traditional,-0.136727,0.029412,1442887,2
29,boys,up,+ occupational specialization + urban/rural,traditional,-0.094923,0.023098,1442887,3
63,boys,up,+ religion-district-specific shr literate old,traditional,-0.087661,0.025469,2637859,4
64,boys,up,+ district-urban FEs,traditional,-0.091484,0.027219,2637859,5
65,boys,up,in above median closest parental education dis...,traditional,-0.062819,0.020998,555147,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
15,girls,up,country-birth-decade FE + child age FEs,muslim,-0.084613,0.010209,1284555,1
16,girls,up,+ household/family characteristics,muslim,-0.077346,0.009362,1284555,2
17,girls,up,+ occupational specialization + urban/rural,muslim,-0.074906,0.009922,1284555,3
51,girls,up,+ religion-district-specific shr literate old,muslim,-0.088367,0.028287,2352071,4
52,girls,up,+ district-urban FEs,muslim,-0.042107,0.009663,2352071,5
53,girls,up,in above median closest parental education dis...,muslim,-0.032606,0.008839,1171734,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
33,girls,up,country-birth-decade FE + child age FEs,traditional,-0.155421,0.036592,1284555,1
34,girls,up,+ household/family characteristics,traditional,-0.148925,0.03457,1284555,2
35,girls,up,+ occupational specialization + urban/rural,traditional,-0.100389,0.026274,1284555,3
69,girls,up,+ religion-district-specific shr literate old,traditional,-0.090486,0.03187,2352071,4
70,girls,up,+ district-urban FEs,traditional,-0.082022,0.026967,2352071,5
71,girls,up,in above median closest parental education dis...,traditional,-0.038484,0.019814,494197,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
6,boys,down,country-birth-decade FE + child age FEs,muslim,0.035472,0.018741,577227,1
7,boys,down,+ household/family characteristics,muslim,0.029331,0.015409,577227,2
8,boys,down,+ occupational specialization + urban/rural,muslim,0.038703,0.013187,577227,3
42,boys,down,+ religion-district-specific shr literate old,muslim,0.036497,0.006699,947437,4
43,boys,down,+ district-urban FEs,muslim,0.014613,0.004275,947437,5
44,boys,down,in above median closest parental education dis...,muslim,0.021193,0.00662,467962,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
24,boys,down,country-birth-decade FE + child age FEs,traditional,0.058386,0.024371,577227,1
25,boys,down,+ household/family characteristics,traditional,0.050755,0.023222,577227,2
26,boys,down,+ occupational specialization + urban/rural,traditional,0.033128,0.01795,577227,3
60,boys,down,+ religion-district-specific shr literate old,traditional,-0.003396,0.025026,947437,4
61,boys,down,+ district-urban FEs,traditional,0.002169,0.026499,947437,5
62,boys,down,in above median closest parental education dis...,traditional,0.026932,0.011766,236473,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
12,girls,down,country-birth-decade FE + child age FEs,muslim,0.047926,0.019903,612685,1
13,girls,down,+ household/family characteristics,muslim,0.040309,0.016749,612685,2
14,girls,down,+ occupational specialization + urban/rural,muslim,0.046883,0.015673,612685,3
48,girls,down,+ religion-district-specific shr literate old,muslim,0.040185,0.007076,972272,4
49,girls,down,+ district-urban FEs,muslim,0.040106,0.007171,972272,5
50,girls,down,in above median closest parental education dis...,muslim,0.031927,0.005791,478311,6


Unnamed: 0,samp,direction,controls,religion,b,se,N,order
30,girls,down,country-birth-decade FE + child age FEs,traditional,0.045741,0.027575,612685,1
31,girls,down,+ household/family characteristics,traditional,0.038991,0.02539,612685,2
32,girls,down,+ occupational specialization + urban/rural,traditional,0.026922,0.021738,612685,3
66,girls,down,+ religion-district-specific shr literate old,traditional,-0.004929,0.025395,972272,4
67,girls,down,+ district-urban FEs,traditional,-0.00704,0.0295,972272,5
68,girls,down,in above median closest parental education dis...,traditional,-0.016973,0.024465,251144,6


In [13]:
def bar_plot(df, leg_loc):
    
    
    labels = ['country-birth-decade FE\n + child age FEs',
              '+ household/family\n characteristics',
              '+ occupational specialization\n + urban/rural',
              '+ religion-district-specific\nshr literate old',
              '+ district-urban FEs',
              'in > 50% C v X similar\nold edu. districts'][::-1]
    coef_m = df[df.religion == 'muslim']['b'][::-1]
    coef_t = df[df.religion == 'traditional']['b'][::-1]
    
    width = 0.35
    x = np.arange(len(labels))
    f, ax = plt.subplots(figsize=(10, 6))
    rects1 = ax.barh(x + width/2, coef_m, width, label='Muslim')
    rects2 = ax.barh(x - width/2, coef_t, width, label='Traditional')
    ax.set_yticks(x)
    ax.set_yticklabels(labels)
    ax.legend(loc=leg_loc, prop={'size': 15})
    ax.tick_params(axis='both', labelsize=15)
    
    return f
    

# 1) Overall

## 1.1) Up

In [14]:
df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'all') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4a_indlevel_gaps_overall_up_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

## 1.2) Down

In [15]:
df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'all') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4b_indlevel_gaps_overall_dn_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

# 2) By gender

## 2.1) Up

In [16]:
df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'boys') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4c_indlevel_gaps_boys_up_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'girls') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4c_indlevel_gaps_girls_up_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

## 2.2) Down

In [17]:
df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'boys') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4d_indlevel_gaps_boys_dn_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'girls') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4d_indlevel_gaps_girls_dn_combined_without_with_shrlitold_control.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)