In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# from scipy.stats import norm, t
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
import seaborn as sns
# from CoefPlot import CoefPlot
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [3]:
nolitold = pd.read_csv(os.path.join(wdir, '_indlevel_gapregs_results_ethsample_noethfe_noshrlitold.csv'))
litold = pd.read_csv(os.path.join(wdir, '_indlevel_gapregs_results_ethsample_noethfe.csv'))

In [4]:
drop_nolitolt = ['-',
                 '+ district-urban FEs']
drop_litold = ['-',
               'country-birth-decade FE + child age FEs',
               '+ religion-district-specific share of literate old',
               '+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies']

In [5]:
nolitold = nolitold[(~nolitold.controls.isin(drop_nolitolt)) & 
                    (~nolitold.controls.str.contains('above median')) ].reset_index(drop=True)
litold = litold[~litold.controls.isin(drop_litold)].reset_index(drop=True)
litold = litold[~((litold.controls.str.contains('C v T')) & (litold.religion=='muslim'))]
litold = litold[~((litold.controls.str.contains('C v M')) & (litold.religion=='traditional'))]
litold.loc[litold.controls.str.contains('above median'), 'controls'] = 'in above median closest parental education distribution districts: C v X'
litold.loc[litold.controls.str.contains('occupation'), 'controls'] = '+ religion-district-specific share of literate old'

In [6]:
nolitold.controls.value_counts()

country-birth-decade FE + child age FEs                                                                                              12
+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies    12
+ urban + prev. gen. industry + prev. gen. occupation                                                                                12
Name: controls, dtype: int64

In [7]:
litold.controls.value_counts()

+ religion-district-specific share of literate old                          12
+ district-urban FEs                                                        12
in above median closest parental education distribution districts: C v X    12
Name: controls, dtype: int64

In [8]:
dfall = pd.concat([nolitold, litold], axis=0, ignore_index=True)

In [9]:
def specification_order(x):
    
    spord = {}
    spord['country-birth-decade FE + child age FEs'] = 1
    spord['+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies'] = 2
    spord['+ urban + prev. gen. industry + prev. gen. occupation'] = 3
    spord['+ religion-district-specific share of literate old'] = 4
    spord['+ district-urban FEs'] = 5
    spord['in above median closest parental education distribution districts: C v X'] = 6
    
    return spord[x]

In [10]:
dfall.columns = ['samp', 'direction', 'controls', 'religion', 'b', 'se', 'N']
dfall['order'] = dfall.controls.apply(lambda x: specification_order(x))
dfall.loc[dfall.controls == '+ religion-district-specific share of literate old', 'controls'] = '+ religion-district-specific shr literate old'
dfall.loc[dfall.controls == '+ hh sz + I(mg hh) + #hh mbrs same gen, prev. gen, prev. gen w/o mom/pop + fam struct dummies + rel head dummies + pg aab dummies', 'controls'] = '+ household/family characteristics'
dfall.loc[dfall.controls == '+ urban + prev. gen. industry + prev. gen. occupation', 'controls'] = '+ occupational specialization + urban/rural'
dfall.loc[dfall.controls == 'in above median closest parental education distribution districts: C v M', 'controls'] = 'in > 50% C v X similar old edu. districts'

In [11]:
dfall

Unnamed: 0,samp,direction,controls,religion,b,se,N,order
0,all,down,country-birth-decade FE + child age FEs,muslim,0.040963,0.018844,1189912,1
1,all,down,+ household/family characteristics,muslim,0.034286,0.015528,1189912,2
2,all,down,+ occupational specialization + urban/rural,muslim,0.042037,0.013927,1189912,3
3,all,up,country-birth-decade FE + child age FEs,muslim,-0.081623,0.009545,2727443,1
4,all,up,+ household/family characteristics,muslim,-0.076644,0.008906,2727443,2
...,...,...,...,...,...,...,...,...
67,girls,down,+ district-urban FEs,traditional,0.021898,0.013900,612685,5
68,girls,down,in above median closest parental education dis...,traditional,0.007677,0.012317,221937,6
69,girls,up,+ religion-district-specific shr literate old,traditional,-0.063177,0.017936,1284555,4
70,girls,up,+ district-urban FEs,traditional,-0.047746,0.019850,1284555,5


In [12]:
def bar_plot(df, leg_loc):
    
    
    labels = ['country-birth-decade FE\n + child age FEs',
              '+ household/family\n characteristics',
              '+ occupational specialization\n + urban/rural',
              '+ religion-district-specific\nshr literate old',
              '+ district-urban FEs',
              'in > 50% C v X similar\nold edu. districts'][::-1]
    coef_m = df[df.religion == 'muslim']['b'][::-1]
    coef_t = df[df.religion == 'traditional']['b'][::-1]
    
    width = 0.35
    x = np.arange(len(labels))
    f, ax = plt.subplots(figsize=(10, 6))
    rects1 = ax.barh(x + width/2, coef_m, width, label='Muslim')
    rects2 = ax.barh(x - width/2, coef_t, width, label='Traditional')
    ax.set_yticks(x)
    ax.set_yticklabels(labels)
    ax.legend(loc=leg_loc, prop={'size': 15})
    ax.tick_params(axis='both', labelsize=15)
    
    return f
    

# 1) Overall

## 1.1) Up

In [13]:
df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'all') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4a_indlevel_gaps_overall_up_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

## 1.2) Down

In [14]:
df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'all') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4b_indlevel_gaps_overall_dn_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

# 2) By gender

## 2.1) Up

In [15]:
df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'boys') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4c_indlevel_gaps_boys_up_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

df = dfall[(dfall.direction == 'up') & (dfall['samp'] == 'girls') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower left')
f.savefig('{}/_4c_indlevel_gaps_girls_up_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

## 2.2) Down

In [16]:
df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'boys') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4d_indlevel_gaps_boys_dn_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)

df = dfall[(dfall.direction == 'down') & (dfall['samp'] == 'girls') & (dfall['controls'] != '-')]
f = bar_plot(df, 'lower right')
f.savefig('{}/_4d_indlevel_gaps_girls_dn_combined_without_with_shrlitold_control_ethsample_noethfe.pdf'.format(outdir), bbox_inches='tight', dpi=100)
plt.close(f)