In [1]:
import pandas as pd
import numpy as np
import re
import country_converter as coco
from textab import TexTab
import os
import statsmodels.api as sm
import pyreadr
from scipy.stats import norm

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

### overall country-level

In [3]:
df = pd.read_csv(os.path.join(wdir,'_E_ctry_religion_bch10.csv'))
df = df[['iso', 'bch10', 'major_religion', 'immg_18', 'imdwmg_18', 'n_immg_18', 'n_imdwmg_18']] 
df = df[((df.bch10 == 1980) | 
         (df.bch10 == 1990)) ]
df['xup'] = df.immg_18 * df.n_immg_18
df['xdn'] = df.imdwmg_18 * df.n_imdwmg_18
del df['immg_18'], df['imdwmg_18']
df = df[['n_immg_18', 'n_imdwmg_18', 'xup', 'xdn']].groupby([df.iso, df.bch10]).sum()
df['im_u'] = df.xup / df.n_immg_18
df['im_d'] = df.xdn / df.n_imdwmg_18
df = df[['im_u', 'im_d']].reset_index(drop=False)
df.index = [df.iso, df.bch10]
df = df.drop(['iso', 'bch10'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso'] + ['{}_{}'.format(var, bd) 
                        for var in ['up', 'dn'] 
                        for bd in [1980, 1990]]

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'up_1980'] = df.up_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'dn_1980'] = df.dn_1990

df = df.drop(['up_1990', 'dn_1990'], axis=1)
df.columns = ['iso', 'ov_up', 'ov_dn']
dfov = df.copy(deep=True)

### country-religion-level

In [4]:
df = pd.read_csv(os.path.join(wdir,'_E_ctry_religion_bch10.csv'))
df = df[['iso', 'bch10', 'major_religion', 'immg_18', 'imdwmg_18', 'n_immg_18', 'n_imdwmg_18']]
df = df[((df.bch10 == 1980) | 
         (df.bch10 == 1990)) &
        ((df.major_religion == 'Christian') |
         (df.major_religion == 'Muslim') |
         (df.major_religion == 'Traditional'))].reset_index(drop=True)
df.index = [df.iso, df.bch10, df.major_religion]
df = df.drop(['iso', 'bch10', 'major_religion'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'bd', 'u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t', 'n_u_c', 'n_u_m', 'n_u_t', 'n_d_c', 'n_d_m', 'n_d_t']
df.index = [df.iso, df.bd]
df = df.drop(['iso', 'bd'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso'] + ['{}_{}'.format(var, bd) 
                        for var in ['u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t', 'n_u_c', 'n_u_m', 'n_u_t', 'n_d_c', 'n_d_m', 'n_d_t']
                        for bd in [1980, 1990]]

# liberia, mali, nigeria, togo we use 1990s

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_c_1980'] = df.u_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_m_1980'] = df.u_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_t_1980'] = df.u_t_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_c_1980'] = df.d_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_m_1980'] = df.d_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_t_1980'] = df.d_t_1990



df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_u_c_1980'] = df.n_u_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_u_m_1980'] = df.n_u_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_u_t_1980'] = df.n_u_t_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_d_c_1980'] = df.n_d_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_d_m_1980'] = df.n_d_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'n_d_t_1980'] = df.n_d_t_1990

dfc = df.drop(
    [
        'u_c_1990', 'u_m_1990', 'u_t_1990', 'd_c_1990', 'd_m_1990', 'd_t_1990',
        'n_u_c_1990', 'n_u_m_1990', 'n_u_t_1990', 'n_d_c_1990', 'n_d_m_1990', 'n_d_t_1990',
        'n_u_t_1980', 'n_d_t_1980',
    ], axis=1
)

### putting together and outputting table

In [5]:
def add_colorbars_with_stars(p1, p2, n1, n2):
    
    if pd.isnull(p1) or pd.isnull(p2):
        return ''
    else:
        se    = (p1*(1-p1)/n1 + p2*(1-p2)/n2) ** 0.5
        z     = (p1-p2)/se
        pv    = norm.sf(abs(z))*2
        if pv < 0.01:
            stars = '\;$^{***}$'
        elif pv < 0.05:
            stars = '\;$^{**\;}$'
        elif pv < 0.1:
            stars = '\;$^{*\;\;}$'
        else:
            stars = '$^{\;\;\;\;\;\;\;}$'
        if p1 > p2:
            width=p1-p2
            return '\colorbox{white}{\parbox{%sem}{\hfill}}\colorbox{Green}{\parbox{%sem}{\hfill}}%s' % (5*width, 5*width, stars)
        else:
            width=p2-p1
            return '\colorbox{Red}{\parbox{%sem}{\hfill}}%s\colorbox{white}{\parbox{%sem}{\hfill}}' % (5*width, stars, 5*width)

In [6]:
df = pd.merge(dfov, dfc, on=['iso'], how='outer')
df['country'] =  coco.convert(names=list(df['iso']), to='name_short')
del df['iso']
df

Unnamed: 0,ov_up,ov_dn,u_c_1980,u_m_1980,u_t_1980,d_c_1980,d_m_1980,d_t_1980,n_u_c_1980,n_u_m_1980,n_d_c_1980,n_d_m_1980,country
0,0.298027,0.291863,0.414577,0.214385,0.212556,0.273951,0.30844,0.469287,20182.0,12944.0,7129.0,1647.0,Benin
1,0.191419,0.234983,0.331732,0.182116,0.072035,0.198559,0.269451,0.569106,20815.0,57639.0,4024.0,2969.0,Burkina Faso
2,0.797601,0.085424,0.82176,0.555556,0.699005,0.082784,0.027027,0.076471,5919.0,9.0,6064.0,37.0,Botswana
3,0.613159,0.056215,0.739492,0.424099,0.481481,0.041517,0.19613,0.184953,22959.0,13511.0,42007.0,3824.0,Cameroon
4,0.673407,0.052212,0.67915,0.673125,,0.04826,0.052494,,25794.0,524986.0,12930.0,181373.0,Egypt
5,0.11555,0.344052,0.137683,0.082365,0.017333,0.323213,0.480881,0.8,82443.0,36569.0,6324.0,863.0,Ethiopia
6,0.557264,0.173118,0.654347,0.468262,0.263328,0.156952,0.263147,0.470998,60176.0,21945.0,61611.0,4792.0,Ghana
7,0.181571,0.438876,0.229127,0.181246,0.137778,0.5,0.418046,0.724138,2108.0,27162.0,774.0,3868.0,Guinea
8,0.221879,0.538385,0.217615,0.266313,0.102564,0.536899,0.544335,0.631579,16350.0,2636.0,12114.0,812.0,Liberia
9,0.273909,0.237432,0.39507,0.273372,0.186646,0.218543,0.236859,0.491228,2921.0,114017.0,604.0,14173.0,Mali


In [7]:
df['diff_up'] = df.apply(lambda x: add_colorbars_with_stars(x['u_c_1980'], x['u_m_1980'], x['n_u_c_1980'], x['n_u_m_1980']), axis=1)
df['diff_dn'] = df.apply(lambda x: add_colorbars_with_stars(x['d_c_1980'], x['d_m_1980'], x['n_d_c_1980'], x['n_d_m_1980']), axis=1)

In [8]:
df = df[['country', 
         'ov_up', 'u_c_1980', 'u_m_1980', 'u_t_1980', 'diff_up', 
         'ov_dn', 'd_c_1980', 'd_m_1980', 'd_t_1980', 'diff_dn']].copy()
df['aux'] = df.u_c_1980 - df.u_m_1980
df = df.sort_values(by='aux', ascending=False).reset_index(drop=True).copy(deep=True)
del df['aux']

In [9]:
df

Unnamed: 0,country,ov_up,u_c_1980,u_m_1980,u_t_1980,diff_up,ov_dn,d_c_1980,d_m_1980,d_t_1980,diff_dn
0,Nigeria,0.611691,0.786035,0.46617,0.229167,\colorbox{white}{\parbox{1.599324342194195em}{...,0.096479,0.077525,0.161654,0.0,\colorbox{Red}{\parbox{0.420644884667876em}{\h...
1,Cameroon,0.613159,0.739492,0.424099,0.481481,\colorbox{white}{\parbox{1.57696627883601em}{\...,0.056215,0.041517,0.19613,0.184953,\colorbox{Red}{\parbox{0.773064085354176em}{\h...
2,Senegal,0.244053,0.527332,0.234995,,\colorbox{white}{\parbox{1.46168611589804em}{\...,0.264186,0.16303,0.274111,,\colorbox{Red}{\parbox{0.555402420708545em}{\h...
3,Botswana,0.797601,0.82176,0.555556,0.699005,\colorbox{white}{\parbox{1.331024384749675em}{...,0.085424,0.082784,0.027027,0.076471,\colorbox{white}{\parbox{0.2787830706696145em}...
4,Benin,0.298027,0.414577,0.214385,0.212556,\colorbox{white}{\parbox{1.000961514433735em}{...,0.291863,0.273951,0.30844,0.469287,\colorbox{Red}{\parbox{0.17244060642187503em}{...
5,Ghana,0.557264,0.654347,0.468262,0.263328,\colorbox{white}{\parbox{0.9304284253695803em}...,0.173118,0.156952,0.263147,0.470998,\colorbox{Red}{\parbox{0.53097209634719em}{\hf...
6,Burkina Faso,0.191419,0.331732,0.182116,0.072035,\colorbox{white}{\parbox{0.748078243273485em}{...,0.234983,0.198559,0.269451,0.569106,\colorbox{Red}{\parbox{0.3544617274460349em}{\...
7,Mali,0.273909,0.39507,0.273372,0.186646,\colorbox{white}{\parbox{0.6084933333529902em}...,0.237432,0.218543,0.236859,0.491228,\colorbox{Red}{\parbox{0.09157884850543488em}{...
8,Mozambique,0.287092,0.323675,0.20748,0.366142,\colorbox{white}{\parbox{0.5809739426945699em}...,0.248628,0.224759,0.313679,0.219512,\colorbox{Red}{\parbox{0.44460119851048em}{\hf...
9,Togo,0.525773,0.640549,0.533907,0.381539,\colorbox{white}{\parbox{0.5332103332111199em}...,0.189721,0.164512,0.214472,0.360619,\colorbox{Red}{\parbox{0.24980098596234em}{\hf...


In [10]:
for col in df.columns[1:]:
    if 'diff' not in col:
        df[col] = np.round(df[col], 3)
for col in df.columns:
    df[col] = df[col].astype('str')
df = df.replace('nan', '')

In [11]:
tt = TexTab(df)

In [12]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)', '(9)', '(10)']
lcols = ['country']
gaps = ['country', 'diff_up']
cap = "Country-group-level estimates of IM, ages 14-18"
lab = "tab:_im_ctry_majrel_1418"
lines = []
notes = '''This table reports upward and downward IM measures for the 1980s cohort (the cohort with the 
broadest coverage) for individuals aged 14-18 by country and major religious group. Because of the timing of censues, 
we have to use the 1990s cohort for Liberia, Mali, Nigeria, and Togo as for those countries we 
don't have 14-18 year olds born in the 1980s. Columns (1) - (4) show the estimates for country-cohort-level upward IM,
columns (6) - (9) those for downward IM. Columns (1) and (6) give the country-level estimates across all groups,
columns (2) - (4) and (7) - (9) give estimates by group. Columns (5) and (10) show differences
between Christians and Muslims. Red bars indicate a negative difference, green bars a positive
difference. $^{***}: p < 0.01$, $^{**}: p < 0.05$, $^{*}: p < 0.1$. $p$-values are computed using the formula for the
distribution of the difference of two sample proportions, i.e. $2\\times\\left(1-\Phi\\left(\\left| \\frac{\\text{IM}_{\\text{c}} - \\text{IM}_{\\text{m}}}{ \\sqrt{\\frac{ \\text{IM}_{\\text{c}}(1-\\text{IM}_{\\text{c}})}{N_c} + \\frac{ \\text{IM}_{\\text{m}}(1-\\text{IM}_{\\text{m}})}{N_m}}} \\right|\\right)\\right)$,
where $\\text{IM}_{\\text{c}}$ and $\\text{IM}_{\\text{m}}$ are the (upward and downward measures of IM for Christians 
and Muslims respectively, $N_c$ and $N_m$ are the number of Christian and Muslim individuals entering into the computation 
of $\\text{IM}_{\\text{c}}$ and $\\text{IM}_{\\text{m}}$, $\\Phi$ is the standard-Normal CDF.
'''

In [13]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)

In [14]:
nl = '\\\\multicolumn{1}{l}{}&&\\\\multicolumn{5}{c}{upward IM}&&\\\\multicolumn{5}{c}{downward IM}'
tab = re.sub(r'&&&&&&&&&&&', nl, tab)
nl = 'country&&overall&Christian&Muslim&Traditional&$\\\\Delta(c-m)$&&overall&Christian&Muslim&Traditional&$\\\\Delta(c-m)$\\\\\\\n'
tab = re.sub(r'(\\hline\nNigeria)', r'%s\1' % nl, tab)
tab = re.sub(r'\(5\)&\(6\)', '(5)&&(6)', tab)
tab = re.sub(r'lccccccccccc', r'lcccccccccccc', tab)

In [15]:
fh = open(outdir + "/_2_ctry_im_majrel_1418.tex", "w")
fh.write(tab)
fh.close()