In [1]:
import numpy as np
import pandas as pd
import re
import country_converter as coco
from textab import TexTab
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

### education statistics

In [3]:
df = pd.read_csv(os.path.join(wdir, '_mean_ysc_ec_by_bd_major_religion.csv'))
df = df[((df.bd == 1940) |
         (df.bd == 1950) |
         (df.bd == 1970) |
         (df.bd == 1980) |
         (df.bd == 1990)) & 
        ((df.major_religion == 'Christian') |
         (df.major_religion == 'Muslim') |
         (df.major_religion == 'Traditional'))].reset_index(drop=True)
df.index = [df.iso, df.bd, df.major_religion]
df = df.drop(['iso', 'bd', 'major_religion'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'bd', 'y_c', 'y_m', 'y_t', 'l_c', 'l_m', 'l_t']
df.index = [df.iso, df.bd]
df = df.drop(['iso', 'bd'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso'] + ['{}_{}'.format(var, bd) 
                        for var in ['y_c', 'y_m', 'y_t', 'l_c', 'l_m', 'l_t'] 
                        for bd in [1940, 1950, 1970, 1980, 1990]]

# guinea, senegal, sierra leone, uganda, south africa we use 1970s for years of schooling

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
      , 'y_c_1980'] = df.y_c_1970

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
      , 'y_m_1980'] = df.y_m_1970

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
       , 'y_t_1980'] = df.y_t_1970

# guinea, senegal, sierra leone, uganda, south africa we use 1980s for literacy

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
       , 'l_c_1990'] = df.l_c_1980

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
       , 'l_m_1990'] = df.l_m_1980

df.loc[# (df.iso == 'GIN') |
       # (df.iso == 'SEN') |
       (df.iso == 'SLE') 
       # (df.iso == 'UGA') |
       # (df.iso == 'ZAF')
       , 'l_t_1990'] = df.l_t_1980

df = df.drop(['y_c_1970', 'y_m_1970', 'y_t_1970', 'l_c_1970', 'l_m_1970', 'l_t_1970'], axis=1)

In [4]:
df['country'] =  coco.convert(names=list(df['iso']), to='name_short')
del df['iso']
df = df[['country'] + list(df.columns)[:-1]]

In [5]:
cols1 = ['country'] + ['{}_{}'.format(var, bd) 
                       for var in ['y_c', 'y_m', 'y_t'] 
                       for bd in [1940, 1980]]
cols2 = ['country'] + ['{}_{}'.format(var, bd) 
                       for var in ['l_c', 'l_m', 'l_t'] 
                       for bd in [1950, 1990]]

df1 = df.loc[:,cols1].copy(deep=True)
df2 = df.loc[:,cols2].copy(deep=True)

In [6]:
def add_colorbars(x1, x2, schooling = True):

    if schooling:
        x1/=10
        x2/=10
    if pd.isnull(x1) or pd.isnull(x2):
        return ''
    elif x1 > x2:
        width=x1-x2
        return '\colorbox{white}{\parbox{%sem}{\hfill}}\colorbox{Green}{\parbox{%sem}{\hfill}}' % (5*width, 5*width)
    else:
        width=x2-x1
        return '\colorbox{Red}{\parbox{%sem}{\hfill}}\colorbox{white}{\parbox{%sem}{\hfill}}' % (5*width, 5*width)

# Years of schooling

In [7]:
df1['d40'] = df1.apply(lambda x: add_colorbars(x['y_c_1940'], x['y_m_1940']), axis=1)
df1['d80'] = df1.apply(lambda x: add_colorbars(x['y_c_1980'], x['y_m_1980']), axis=1)

In [8]:
df1 = df1[['country',
           'y_c_1940', 'y_m_1940', 'y_t_1940', 'd40',
           'y_c_1980', 'y_m_1980', 'y_t_1980', 'd80']]
df1['aux'] = df1.y_c_1940 - df1.y_m_1940
df1 = df1.sort_values(by='aux', ascending=False).reset_index(drop=True).copy(deep=True)
del df1['aux']

In [9]:
for col in df1.columns[1:]:
    if 'd' not in col:
        df1[col] = np.round(df1[col], 2)
for col in df1.columns:
    df1[col] = df1[col].astype('str')
df1 = df1.replace('nan', '')

In [10]:
tt = TexTab(df1)

In [11]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)']
lcols = ['country']
gaps = ['country', 'd40']
cap = "Country-group-level years of schooling"
lab = "tab:_ysc_ctry_majrel"
lines = []
notes = '''This table reports mean years of schooling for individuals aged 25+ for the 1940s and 1980s cohorts
(because of census timing, we have to use the 1970s instead of the 1980s Sierra Leone. Columns (1) - (3) 
show mean years of schooling for the 1940s, Columns (5) - (7) for the 1980s (or 1970s). Columns (4) and (8) 
show differences between Christians and Muslims. Red bars indicate a negative difference, green bars a positive 
difference.
'''

In [12]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)

In [13]:
nl = '&&\\\\multicolumn{4}{c}{1940s}&&\\\\multicolumn{4}{c}{1980s}'
tab = re.sub(r'&&&&&&&&&&', nl, tab)
nl = 'country&&Christian&Muslim&Traditional&$\\\\Delta(c-m)$&&Christian&Muslim&Traditional&$\\\\Delta(c-m)$\\\\\\\n'
tab = re.sub(r'(\\hline\nGhana)', r'%s\1' % nl, tab)
# nl = '\\multicolumn{3}{c}{religion share}&'
# tab = re.sub(r'&&&&', r'&%s' % nl, tab)

In [14]:
fh = open(outdir + "/_0_ctry_ysc_majrel.tex", "w")
fh.write(tab)
fh.close()

# Share primary completed

In [15]:
df2['d50'] = df2.apply(lambda x: add_colorbars(x['l_c_1950'], x['l_m_1950'], schooling=False), axis=1)
df2['d90'] = df2.apply(lambda x: add_colorbars(x['l_c_1990'], x['l_m_1990'], schooling=False), axis=1)

In [16]:
df2 = df2[['country',
           'l_c_1950', 'l_m_1950', 'l_t_1950', 'd50',
           'l_c_1990', 'l_m_1990', 'l_t_1990', 'd90']]
df2['aux'] = df2.l_c_1950 - df2.l_m_1950
df2 = df2.sort_values(by='aux', ascending=False).reset_index(drop=True).copy(deep=True)
del df2['aux']

In [17]:
for col in df2.columns[1:]:
    if 'd' not in col:
        df2[col] = np.round(df2[col], 2)
for col in df2.columns:
    df2[col] = df2[col].astype('str')
df2 = df2.replace('nan', '')

In [18]:
tt = TexTab(df2)

In [19]:
df2

Unnamed: 0,country,l_c_1950,l_m_1950,l_t_1950,d50,l_c_1990,l_m_1990,l_t_1990,d90
0,Ghana,0.62,0.23,0.12,\colorbox{white}{\parbox{1.956342900120075em}{...,0.77,0.57,0.3,\colorbox{white}{\parbox{1.0310340358967003em}...
1,Cameroon,0.68,0.3,0.21,\colorbox{white}{\parbox{1.910782621457735em}{...,0.76,0.42,0.39,\colorbox{white}{\parbox{1.7283034257823948em}...
2,Nigeria,0.55,0.23,0.29,\colorbox{white}{\parbox{1.6164405705096698em}...,0.88,0.57,0.35,\colorbox{white}{\parbox{1.5743530513396053em}...
3,Togo,0.5,0.19,0.11,\colorbox{white}{\parbox{1.56316123957845em}{\...,0.73,0.57,0.42,\colorbox{white}{\parbox{0.8040710941562601em}...
4,Sierra Leone,0.43,0.15,0.04,\colorbox{white}{\parbox{1.4321882430889548em}...,0.49,0.29,0.25,\colorbox{white}{\parbox{0.9786977255686948em}...
5,Senegal,0.47,0.22,,\colorbox{white}{\parbox{1.274571008573655em}{...,0.8,0.45,,\colorbox{white}{\parbox{1.7685472332914953em}...
6,Benin,0.35,0.12,0.04,\colorbox{white}{\parbox{1.112256878034945em}{...,0.65,0.4,0.45,\colorbox{white}{\parbox{1.24936608924303em}{\...
7,Guinea,0.36,0.14,0.08,\colorbox{white}{\parbox{1.1033559194133151em}...,0.58,0.4,0.41,\colorbox{white}{\parbox{0.9080248350454351em}...
8,Egypt,0.47,0.31,,\colorbox{white}{\parbox{0.757042863115315em}{...,0.88,0.87,,\colorbox{white}{\parbox{0.06106418568567473em...
9,Malawi,0.25,0.11,0.16,\colorbox{white}{\parbox{0.72282804858193em}{\...,0.31,0.2,0.18,\colorbox{white}{\parbox{0.53299034678291em}{\...


In [20]:
cns = ['', '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)']
lcols = ['country']
gaps = ['country', 'd50']
cap = "Country-group-level share primary completed"
lab = "tab:_lit_ctry_majrel"
lines = []
notes = '''This table reports share primary completed for individuals aged 14+ for the 1950s and 1990s cohorts
(because of census timing, we have to use the 1980s instead of the 1990s for Sierra Leone. Columns (1) - (3) 
show the shares for the 1950s, Columns (5) - (7) for the 1990s (1980s). Columns (4) and (8) show differences 
between Christians and Muslims. Red bars indicate a negative difference, green bars a positive difference.
'''

In [21]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)

In [22]:
nl = '&&\\\\multicolumn{4}{c}{1950s}&&\\\\multicolumn{4}{c}{1990s}'
tab = re.sub(r'&&&&&&&&&&', nl, tab)
nl = 'country&&Christian&Muslim&Traditional&$\\\\Delta(c-m)$&&Christian&Muslim&Traditional&$\\\\Delta(c-m)$\\\\\\\n'
tab = re.sub(r'(\\hline\nGhana)', r'%s\1' % nl, tab)

In [23]:
fh = open(outdir + "/_0_ctry_lit_majrel.tex", "w")
fh.write(tab)
fh.close()