In [1]:
import numpy as np
import pandas as pd
import re
from textab import TexTab
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

## 1980s (1990s)

In [3]:
df = pd.read_csv(os.path.join(wdir,'_F_dist_religion_bch10.csv'))
df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'imdwmg_18']]
# df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'n_immg_18']]
df = df[((df.bch10 == 1980) | 
         (df.bch10 == 1990)) &
        ((df.major_religion == 'Christian') |
         (df.major_religion == 'Muslim') |
         (df.major_religion == 'Traditional'))].reset_index(drop=True)
# df = df[df.n_immg_18 > 10].reset_index(drop=False)
df.index = [df.iso, df.district, df.bch10, df.major_religion]
df = df.drop(['iso', 'district', 'bch10', 'major_religion'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district', 'bd', 'u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t']
df.index = [df.iso, df.district, df.bd]
df = df.drop(['iso', 'district', 'bd'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district'] + ['{}_{}'.format(var, bd) 
                                    for var in ['u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t'] 
                                    for bd in [1980, 1990]]

# liberia, mali, nigeria, togo we use 1990s

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_c_1980'] = df.u_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_m_1980'] = df.u_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_t_1980'] = df.u_t_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_c_1980'] = df.d_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_m_1980'] = df.d_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_t_1980'] = df.d_t_1990
df1980 = df.drop(['u_c_1990', 'u_m_1990', 'u_t_1990', 'd_c_1990', 'd_m_1990', 'd_t_1990'], axis=1)

## All birth decades

In [4]:
df = pd.read_csv(os.path.join(wdir,'_F_dist_religion_bch10.csv'))
df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'n_immg_18', 'imdwmg_18', 'n_imdwmg_18']]
df = df[(~pd.isnull(df.immg_18)) | (~pd.isnull(df.imdwmg_18)) ]
df = df[df.major_religion.isin(['Christian', 'Muslim', 'Traditional'])]
df['xu'] = df.immg_18 * df.n_immg_18
df['xd'] = df.imdwmg_18 * df.n_imdwmg_18
del df['immg_18'], df['imdwmg_18'], df['bch10']
df = df[['n_immg_18', 'n_imdwmg_18', 'xu', 'xd']].groupby([df.iso, df.district, df.major_religion]).sum().reset_index(drop=False)
# df = df[df.n_immg_18 > 10]
df['u'] = df.xu / df.n_immg_18
df['d'] = df.xd / df.n_imdwmg_18
del df['xu'], df['xd'], df['n_immg_18'], df['n_imdwmg_18']
df.index = [df.iso, df.district, df.major_religion]
del df['iso'], df['district'], df['major_religion']
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district', 'u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t']

In [5]:
corrmat_all = np.zeros((6,6))
corrmat_1980 = np.zeros((6,6))

In [6]:
for i, c1 in enumerate(df.columns[2:]):
    for j, c2 in enumerate(df.columns[2:]):
        if i >= j:
            d1 = df[(~pd.isnull(df[c1])) & (~pd.isnull(df[c2]))][c1].values
            d2 = df[(~pd.isnull(df[c1])) & (~pd.isnull(df[c2]))][c2].values
            corrmat_all[i,j] = np.corrcoef(d1, d2)[0,1]
for i, c1 in enumerate(df1980.columns[2:]):
    for j, c2 in enumerate(df1980.columns[2:]):
        if i >= j:
            d1 = df1980[(~pd.isnull(df1980[c1])) & (~pd.isnull(df1980[c2]))][c1].values
            d2 = df1980[(~pd.isnull(df1980[c1])) & (~pd.isnull(df1980[c2]))][c2].values
            corrmat_1980[i,j] = np.corrcoef(d1, d2)[0,1]

In [7]:
dfall = pd.DataFrame(corrmat_all,
                     columns=['uc', 'um', 'ut', 'dc', 'dm', 'dt'],
                     index=['Christian', 'Muslim', 'Traditional', 'Christian', 'Muslim', 'Traditional']).reset_index(drop=False)
dfall['direction'] = ['', 'upward', '', '', 'downward', '']
dfall = dfall[['direction', 'index'] + list(dfall.columns)[1:-1]]
for col in dfall.columns[2:]:
    dfall[col] = np.round(dfall[col], 3)
dfall

Unnamed: 0,direction,index,uc,um,ut,dc,dm,dt
0,,Christian,1.0,0.0,0.0,0.0,0.0,0.0
1,upward,Muslim,0.732,1.0,0.0,0.0,0.0,0.0
2,,Traditional,0.724,0.603,1.0,0.0,0.0,0.0
3,,Christian,-0.76,-0.606,-0.579,1.0,0.0,0.0
4,downward,Muslim,-0.625,-0.633,-0.444,0.615,1.0,0.0
5,,Traditional,-0.6,-0.431,-0.581,0.525,0.388,1.0


In [8]:
df1980 = pd.DataFrame(corrmat_1980,
                     columns=['uc', 'um', 'ut', 'dc', 'dm', 'dt'],
                     index=['Christian', 'Muslim', 'Traditional', 'Christian', 'Muslim', 'Traditional']).reset_index(drop=False)
df1980['direction'] = ['', 'upward', '', '', 'downward', '']
df1980 = df1980[['direction', 'index'] + list(df1980.columns)[1:-1]]
for col in df1980.columns[2:]:
    df1980[col] = np.round(df1980[col], 3)
df1980

Unnamed: 0,direction,index,uc,um,ut,dc,dm,dt
0,,Christian,1.0,0.0,0.0,0.0,0.0,0.0
1,upward,Muslim,0.675,1.0,0.0,0.0,0.0,0.0
2,,Traditional,0.627,0.468,1.0,0.0,0.0,0.0
3,,Christian,-0.648,-0.509,-0.461,1.0,0.0,0.0
4,downward,Muslim,-0.558,-0.585,-0.389,0.543,1.0,0.0
5,,Traditional,-0.45,-0.311,-0.437,0.396,0.292,1.0


## writing .tex tables

### all birth decades

In [9]:
tt = TexTab(dfall)

In [10]:
cns = ['', '', 'Christian', 'Muslim', 'Traditional', 'Christian', 'Muslim', 'Traditional']
lcols = ['direction', 'index']
gaps = ['']
cap = "Correlation matrix, district-level IM, ages 14-18, all birth decades"
lab = "tab:_corrmat_im_dist_majrel_1418_allbch"
lines = []
notes = '''TABLES NOTES.
'''

In [11]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
nl = '&&\\\\multicolumn{3}{c|}{upward}&\\\\multicolumn{3}{c|}{downward}\\\\\\\n'
tab = re.sub(r'&&\(1\).+\n', r'%s' % nl, tab)
tab = re.sub(r'llcccccc', r'|l|l|cccccc|', tab)
tab = re.sub(r'\\columnwidth', r'0.8\\columnwidth', tab)
tab = re.sub(r'&0.0', r'&', tab)
tab = re.sub(r'Traditional(&Christian)', r'\\multicolumn{1}{c|}{Traditional}\1', tab)
tab = re.sub(r'(&&&\\\\\n)(&Christian)', r'\1\\cline{1-2}\n\2', tab)

In [12]:
fh = open(outdir + "/_2_corrmat_dist_im_majrel_1418_allbch.tex", "w")
fh.write(tab)
fh.close()

### 1980s

In [13]:
tt = TexTab(df1980)

In [14]:
cns = ['', '', 'Christian', 'Muslim', 'Traditional', 'Christian', 'Muslim', 'Traditional']
lcols = ['direction', 'index']
gaps = ['']
cap = "Correlation matrix, district-level IM, ages 14-18, 1980s"
lab = "tab:_corrmat_im_dist_majrel_1418_1980bch"
lines = []
notes = '''TABLES NOTES.
'''

In [15]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
nl = '&&\\\\multicolumn{3}{c|}{upward}&\\\\multicolumn{3}{c|}{downward}\\\\\\\n'
tab = re.sub(r'&&\(1\).+\n', r'%s' % nl, tab)
tab = re.sub(r'llcccccc', r'|l|l|cccccc|', tab)
tab = re.sub(r'\\columnwidth', r'0.8\\columnwidth', tab)
tab = re.sub(r'&0.0', r'&', tab)
tab = re.sub(r'Traditional(&Christian)', r'\\multicolumn{1}{c|}{Traditional}\1', tab)
tab = re.sub(r'(&&&\\\\\n)(&Christian)', r'\1\\cline{1-2}\n\2', tab)

In [16]:
fh = open(outdir + "/_2_corrmat_dist_im_majrel_1418_1980bch.tex", "w")
fh.write(tab)
fh.close()