In [1]:
import pandas as pd
import numpy as np
import re
import country_converter as coco
from textab import TexTab
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [3]:
data = pd.read_csv(os.path.join(wdir,'_F_dist_religion_bch10.csv'))[['iso',
                                                         'bch10',
                                                         'major_religion',
                                                         'district',
                                                         'immg_18', 
                                                         'imdwmg_18']]
data = data[data.major_religion.isin(['Christian', 'Muslim'])].reset_index(drop=True)
dataup = data[~pd.isnull(data.immg_18)].drop('imdwmg_18', axis=1)
datadn = data[~pd.isnull(data.imdwmg_18)].drop('immg_18', axis=1)


#####################################################################################

dataup = dataup[dataup.bch10.isin([1980, 1990])].reset_index(drop=True)
dataup = dataup[~((dataup.iso.isin(['LBR','MLI','NGA','TGO'])) & 
                  (dataup.bch10==1980))].reset_index(drop=True)
dataup = dataup[~((~(dataup.iso.isin(['LBR','MLI','NGA','TGO']))) & 
                    (dataup.bch10==1990))].reset_index(drop=True)

datadn = datadn[datadn.bch10.isin([1980, 1990])].reset_index(drop=True)
datadn = datadn[~((datadn.iso.isin(['LBR','MLI','NGA','TGO'])) & 
                  (datadn.bch10==1980))].reset_index(drop=True)
datadn = datadn[~((~(datadn.iso.isin(['LBR','MLI','NGA','TGO']))) & 
                    (datadn.bch10==1990))].reset_index(drop=True)


dataup = dataup.drop('bch10', axis=1).set_index(['iso', 'district', 'major_religion'], drop=True).unstack().reset_index(drop=False)
datadn = datadn.drop('bch10', axis=1).set_index(['iso', 'district', 'major_religion'], drop=True).unstack().reset_index(drop=False)
dataup.columns = ['iso', 'district', 'c', 'm']
datadn.columns = ['iso', 'district', 'c', 'm']
dataup['gap'] = dataup.c - dataup.m
datadn['gap'] = datadn.c - datadn.m
dataup = dataup[['iso', 'gap']].dropna().reset_index(drop=True)
datadn = datadn[['iso', 'gap']].dropna().reset_index(drop=True)
dataup['posupgap'] = dataup.gap > 0
datadn['negdngap'] = datadn.gap < 0

dataup['country'] =  coco.convert(names=list(dataup['iso']), to='name_short')
datadn['country'] =  coco.convert(names=list(datadn['iso']), to='name_short')

dataup = dataup.groupby('country').agg({'gap': ['count', 'median', 'min', 'max', 'mean', 'std'], 'posupgap': ['mean']})
datadn = datadn.groupby('country').agg({'gap': ['count', 'median', 'min', 'max', 'mean', 'std'], 'negdngap': ['mean']})
dataup.columns = [c for c in dataup.columns.droplevel(0)[:-1]] + ['shr_c_higher_up']
datadn.columns = [c for c in datadn.columns.droplevel(0)[:-1]] + ['shr_c_lower_dn']
dataup = dataup.sort_values('median', ascending=False).reset_index(drop=False)
datadn = datadn.sort_values('median', ascending=True).reset_index(drop=False)

# Creating tex tables

## upward

In [4]:
tt = TexTab(dataup)

In [5]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)']
lcols = ['country']
gaps = []
cap = "Country-level summary statistics for district-level Christian-Muslim IM gaps"
lab = "tab:_im_ctry_majrel_dist_im_cmgap_sumstats"
lines = []
notes = '''BLAH.
'''

In [6]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
nl = '&(1)&(2)&(3)&(4)&(5)&(6)&(7)\\\\\\\ncountry&$N_{\\\\text{districts}}$&median&min&max&mean&std&share$(\\\\text{IM$_{\\\\text{c}}^{\\\\text{up}}$}>\\\\text{IM$_{\\\\text{m}}^{\\\\text{up}}$})$\\\\\\\n\\\\hline\nCameroon'
tabu = re.sub('&\(1.+?Cameroon', nl, tab, flags=re.DOTALL) 

In [7]:
print(tabu)

\begin{table}[ht!]
\singlespacing
\centering
\caption{Country-level summary statistics for district-level Christian-Muslim IM gaps}
\label{tab:_im_ctry_majrel_dist_im_cmgap_sumstats}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccccccc}
\hline
&(1)&(2)&(3)&(4)&(5)&(6)&(7)\\
country&$N_{\text{districts}}$&median&min&max&mean&std&share$(\text{IM$_{\text{c}}^{\text{up}}$}>\text{IM$_{\text{m}}^{\text{up}}$})$\\
\hline
Cameroon&165&0.184&-0.415&0.924&0.181&0.255&0.764\\
Senegal&26&0.173&-0.145&0.511&0.188&0.162&0.923\\
Burkina Faso&46&0.125&-0.032&0.454&0.14&0.091&0.935\\
Togo&37&0.09&-0.318&0.471&0.087&0.142&0.757\\
Botswana&8&0.085&-0.399&0.864&0.238&0.54&0.5\\
Ghana&110&0.083&-0.228&0.428&0.069&0.122&0.736\\
Guinea&26&0.079&-0.253&0.836&0.156&0.274&0.808\\
Nigeria&21&0.075&-0.546&0.538&0.074&0.278&0.619\\
Benin&75&0.067&-0.653&0.935&0.072&0.216&0.72\\
Zambia&27&0.061&-0.623&0.622&0.064&0.338&0.556\\
Sierra Leone&97&0.058&-0.2&0.909&0.122&0.197&0.794\\
Mali&164&0.055&-0.409&0.961&0.12&0.

## downward

In [8]:
tt = TexTab(datadn)

In [9]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)']
lcols = ['country']
gaps = []
cap = "Country-level summary statistics for district-level Christian-Muslim IM gaps"
lab = "tab:_im_ctry_majrel_dist_im_cmgap_sumstats"
lines = []
notes = '''This table reports IM-gaps for the 1980s cohort (the cohort with the 
broadest coverage) for individuals aged 14-18 by country and major religious group. This table
omits the ``Traditional'' category and focuses only on Christians and Muslims. Panel A shows
estimates for upward IM, panel B for downward IM. Because of the timing of censues, 
we have to use the 1990s cohort for Liberia, Mali, Nigeria, and Togo as for those countries we 
don't have 14-18 year olds born in the 1980s. Column (1) shows the number of districts for which
we have data for both Christian and Muslim IM in the required age group. Columns (2) - (6)
show statistics of the gap across districts in the country: median, min, max, mean and 
standard deviation. Column (7) shows the share of districts for which Christians have a 
higher upward mobility than Muslims (Panel A) or a lower downward mobility than Muslims 
(Panel B).
'''

In [10]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
nl = '&(1)&(2)&(3)&(4)&(5)&(6)&(7)\\\\\\\ncountry&$N_{\\\\text{districts}}$&median&min&max&mean&std&share$(\\\\text{IM$_{\\\\text{c}}^{\\\\text{down}}$}<\\\\text{IM$_{\\\\text{m}}^{\\\\text{down}}$})$\\\\\\\n\\\\hline\nMalawi'
tabd = re.sub('&\(1.+?Malawi', nl, tab, flags=re.DOTALL) 

## Making two panels

In [11]:
nl = '\\\\multicolumn{8}{c}{\\\\textbf{Panel A: Upward}}\\\\\\\n'
tabu = re.sub(r'(\\hline\n&\(1)', r'%s\1' % nl, tabu, flags=re.DOTALL)
tabu = re.sub(r'\\end{tabular}.+', r'', tabu, flags=re.DOTALL)

In [12]:
nl = '\\\\multicolumn{8}{c}{}\\\\\\\n\\\\multicolumn{8}{c}{\\\\textbf{Panel B: Downward}}\\\\\\\n'
tabd = re.sub(r'.+?(\\hline\n&\(1)', r'%s\1' % nl, tabd, flags=re.DOTALL)

In [13]:
tab = tabu+tabd
print(tab)

\begin{table}[ht!]
\singlespacing
\centering
\caption{Country-level summary statistics for district-level Christian-Muslim IM gaps}
\label{tab:_im_ctry_majrel_dist_im_cmgap_sumstats}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccccccc}
\multicolumn{8}{c}{\textbf{Panel A: Upward}}\\
\hline
&(1)&(2)&(3)&(4)&(5)&(6)&(7)\\
country&$N_{\text{districts}}$&median&min&max&mean&std&share$(\text{IM$_{\text{c}}^{\text{up}}$}>\text{IM$_{\text{m}}^{\text{up}}$})$\\
\hline
Cameroon&165&0.184&-0.415&0.924&0.181&0.255&0.764\\
Senegal&26&0.173&-0.145&0.511&0.188&0.162&0.923\\
Burkina Faso&46&0.125&-0.032&0.454&0.14&0.091&0.935\\
Togo&37&0.09&-0.318&0.471&0.087&0.142&0.757\\
Botswana&8&0.085&-0.399&0.864&0.238&0.54&0.5\\
Ghana&110&0.083&-0.228&0.428&0.069&0.122&0.736\\
Guinea&26&0.079&-0.253&0.836&0.156&0.274&0.808\\
Nigeria&21&0.075&-0.546&0.538&0.074&0.278&0.619\\
Benin&75&0.067&-0.653&0.935&0.072&0.216&0.72\\
Zambia&27&0.061&-0.623&0.622&0.064&0.338&0.556\\
Sierra Leone&97&0.058&-0.2&0.909&0.122&0.

In [14]:
fh = open(outdir + "/_2_ctry_dist_im_cmgaps_sumstats.tex", "w")
fh.write(tab)
fh.close()