In [1]:
import pandas as pd
import numpy as np
import re
import country_converter as coco
from textab import TexTab
import os

In [2]:
outdir = '../data'

In [3]:
current_folder = globals()['_dh'][0]
cwd = os.getcwd()
rootdir = os.path.dirname(os.path.dirname(cwd))
path_to_src = os.path.join(rootdir, '_2_intermediate', 'data')

In [4]:
df = pd.read_csv(os.path.join(path_to_src,'imupdown_1418_by_bchmajrel_gender.csv'))
df = df[(df.major_religion=='Christian') |
        (df.major_religion=='Muslim')][['iso', 'major_religion', 'bch10', 'male',
                                        'imup_1418c', 'imdn_1418c', 'imup_1418g', 'imdn_1418g']].reset_index(drop=True).copy(deep=True)

df['majrel'] = df.major_religion.str.lower()
del df['major_religion']
df80 = df[df.bch10==1980].reset_index(drop=True).copy(deep=True)
df90 = df[(df.bch10==1990) & (df.iso.isin(['TGO', 'MLI', 'NGA', 'LBR']))].reset_index(drop=True).copy(deep=True)
del df80['bch10']
del df90['bch10']
df = pd.concat([df80, df90], axis=0).reset_index(drop=True).copy(deep=True)
dfu = df.drop(['imdn_1418g', 'imdn_1418c'], axis=1)
dfd = df.drop(['imup_1418g', 'imup_1418c'], axis=1)

dfu.index = [dfu.iso, dfu.majrel, dfu.male]
dfu = dfu.drop(['male', 'majrel', 'iso'], axis=1)
dfu = dfu.unstack().unstack().iloc[:,[0,2,4,5,6,7]].reset_index(drop=False)
dfu.columns = dfu.columns.droplevel().droplevel()
dfu.columns = ['iso', 'of', 'om', 'cf', 'mf', 'cm', 'mm']
dfu

dfd.index = [dfd.iso, dfd.majrel, dfd.male]
dfd = dfd.drop(['male', 'majrel', 'iso'], axis=1)
dfd = dfd.unstack().unstack().iloc[:,[0,2,4,5,6,7]].reset_index(drop=False)
dfd.columns = dfd.columns.droplevel().droplevel()
dfd.columns = ['iso', 'of', 'om', 'cf', 'mf', 'cm', 'mm']

dfu['country'] =  coco.convert(names=list(dfu['iso']), to='name_short')
del dfu['iso']
dfd['country'] =  coco.convert(names=list(dfd['iso']), to='name_short')
del dfd['iso']


dfu = dfu[['country'] + list(dfu.columns)[:-1]]
dfd = dfd[['country'] + list(dfd.columns)[:-1]]


dfu['D_f'] = dfu.cf - dfu.mf
dfu['D_m'] = dfu.cm - dfu.mm
dfd['D_f'] = dfd.cf - dfd.mf
dfd['D_m'] = dfd.cm - dfd.mm

dfu = dfu.sort_values('D_f', ascending=False).reset_index(drop=True)
dfd = dfd.sort_values('D_f', ascending=True).reset_index(drop=True)

In [5]:
def add_colorbars(x1, x2):
    
    if pd.isnull(x1) or pd.isnull(x2):
        return ''
    elif x1 > x2:
        width=x1-x2
        return '\colorbox{white}{\parbox{%sem}{\hfill}}\colorbox{Green}{\parbox{%sem}{\hfill}}' % (5*width, 5*width)
    else:
        width=x2-x1
        return '\colorbox{Red}{\parbox{%sem}{\hfill}}\colorbox{white}{\parbox{%sem}{\hfill}}' % (5*width, 5*width)

In [6]:
dfu['diff_female'] = dfu.apply(lambda x: add_colorbars(x['cf'], x['mf']), axis=1)
dfu['diff_male'] = dfu.apply(lambda x: add_colorbars(x['cm'], x['mm']), axis=1)

dfd['diff_female'] = dfd.apply(lambda x: add_colorbars(x['cf'], x['mf']), axis=1)
dfd['diff_male'] = dfd.apply(lambda x: add_colorbars(x['cm'], x['mm']), axis=1)

dfu['diff_diff'] = dfu.apply(lambda x: add_colorbars(x['D_f'], x['D_m']), axis=1)
dfd['diff_diff'] = dfd.apply(lambda x: add_colorbars(x['D_f'], x['D_m']), axis=1)

In [7]:
dfu = dfu[['country', 'of', 'cf', 'mf', 'diff_female',  'om', 'cm', 'mm', 'diff_male', 'diff_diff']]
dfd = dfd[['country', 'of', 'cf', 'mf', 'diff_female',  'om', 'cm', 'mm', 'diff_male', 'diff_diff']]

In [8]:
for col in dfu.columns[1:]:
    if 'diff' not in col:
        dfu[col] = np.round(dfu[col], 3)
for col in dfu.columns:
    dfu[col] = dfu[col].astype('str')
dfu = dfu.replace('nan', '')

for col in dfd.columns[1:]:
    if 'diff' not in col:
        dfd[col] = np.round(dfd[col], 3)
for col in dfd.columns:
    dfd[col] = dfd[col].astype('str')
dfd = dfd.replace('nan', '')

# Creating tex tables

## upward

In [9]:
tt = TexTab(dfu)

In [10]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)', '(9)']
lcols = ['country']
gaps = ['country', 'diff_female', 'diff_male']
cap = "Country-group-level estimates of IM for Christians and Muslims by gender"
lab = "tab:_im_ctry_majrel_gender"
lines = []
notes = '''BLAH.
'''

In [11]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)

In [12]:
nl = '\\\\multicolumn{2}{l}{}&\\\\multicolumn{4}{c}{female}&&\\\\multicolumn{4}{c}{male}&&diff-in-diff'
tab = re.sub(r'&&&&&&&&&&&&', nl, tab)
nl = 'country&&Overall&Christian&Muslim&$\\\\Delta(c-m)$&&Overall&Christian&Muslim&$\\\\Delta(c-m)$&&$\\\\Delta(f-m)\\\\Delta(c-m)$\\\\\\\n'
tabu = re.sub(r'(\\hline\nNigeria)', r'%s\1' % nl, tab)

## downward

In [13]:
tt = TexTab(dfd)

In [14]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)', '(9)']
lcols = ['country']
gaps = ['country', 'diff_female', 'diff_male']
cap = "Country-group-level estimates of IM for Christians and Muslims by gender"
lab = "tab:_im_ctry_majrel_gender"
lines = []
notes = '''This table reports IM for the 1980s cohort (the cohort with the 
broadest coverage) for individuals aged 14-18 by country and major religious group. This table
omits the ``Other'' category and focuses only on Christians and Muslims. Panel A shows
estimates for upward IM, panel B for downward IM. Because of the timing of censues, 
we have to use the 1990s cohort for Liberia, Mali, Nigeria, and Togo as for those countries we 
don't have 14-18 year olds born in the 1980s. Columns (1) - (4) show the estimates for females, columns (5) - (8)
those for males. Columns (1) and (5) give the country-level estimates across all groups,
columns (2) - (3) and (6) - (7) give estimates by group. Columns (4) and (8) show differences
between Christians and Muslims for females and males respectively. Red bars indicate a negative difference, green bars a positive
difference. Column (9) shows the difference-in-difference, i.e. the female difference between christians and muslims
minus the male difference.
'''

In [15]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)

In [16]:
nl = '\\\\multicolumn{2}{l}{}&\\\\multicolumn{4}{c}{female}&&\\\\multicolumn{4}{c}{male}&&diff-in-diff'
tab = re.sub(r'&&&&&&&&&&&&', nl, tab)
nl = 'country&&Overall&Christian&Muslim&$\\\\Delta(c-m)$&&Overall&Christian&Muslim&$\\\\Delta(c-m)$&&$\\\\Delta(f-m)\\\\Delta(c-m)$\\\\\\\n'
tabd = re.sub(r'(\\hline\nCameroon)', r'%s\1' % nl, tab)

## Making two panels

In [17]:
nl = '\\\\multicolumn{13}{c}{\\\\textbf{Panel A: Upward}}\\\\\\\n'
tabu = re.sub(r'(\\hline\n\\multicolumn\{2\})', r'%s\1' % nl, tabu, flags=re.DOTALL)
tabu = re.sub(r'\\end{tabular}.+', r'', tabu, flags=re.DOTALL)

In [18]:
nl = '\\\\multicolumn{13}{c}{}\\\\\\\n\\\\multicolumn{13}{c}{\\\\textbf{Panel B: Downward}}\\\\\\\n'
tabd = re.sub(r'.+?(\\hline\n\\multicolumn\{2\})', r'%s\1' % nl, tabd, flags=re.DOTALL)

In [19]:
tab = tabu+tabd
print(tab)

\begin{table}[ht!]
\singlespacing
\centering
\caption{Country-group-level estimates of IM for Christians and Muslims by gender}
\label{tab:_im_ctry_majrel_gender}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lcccccccccccc}
\multicolumn{13}{c}{\textbf{Panel A: Upward}}\\
\hline
\multicolumn{2}{l}{}&\multicolumn{4}{c}{female}&&\multicolumn{4}{c}{male}&&diff-in-diff\\
\hline
&\;\;\;\;\;\;\;\;\;\;\;&(1)&(2)&(3)&(4)&\;\;\;\;\;\;\;\;\;\;\;&(5)&(6)&(7)&(8)&\;\;\;\;\;\;\;\;\;\;\;&(9)\\
country&&Overall&Christian&Muslim&$\Delta(c-m)$&&Overall&Christian&Muslim&$\Delta(c-m)$&&$\Delta(f-m)\Delta(c-m)$\\
\hline
Nigeria&&0.608&0.801&0.426&\colorbox{white}{\parbox{1.8750541297381549em}{\hfill}}\colorbox{Green}{\parbox{1.8750541297381549em}{\hfill}}&&0.616&0.773&0.489&\colorbox{white}{\parbox{1.4227117568355698em}{\hfill}}\colorbox{Green}{\parbox{1.4227117568355698em}{\hfill}}&&\colorbox{white}{\parbox{0.45234237290258494em}{\hfill}}\colorbox{Green}{\parbox{0.45234237290258494em}{\hfill}}\\
Cameroon&&

In [20]:
fh = open(outdir + "/_2_ctry_im_majrel_gender.tex", "w")
fh.write(tab)
fh.close()