In [1]:
import numpy as np
import pandas as pd
import re
from textab import TexTab
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

## 1980s (1990s)

In [3]:
df = pd.read_csv(os.path.join(wdir,'_F_dist_religion_bch10.csv'))
df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'imdwmg_18']]
# df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'n_immg_18']]
df = df[((df.bch10 == 1980) | 
         (df.bch10 == 1990)) &
        ((df.major_religion == 'Christian') |
         (df.major_religion == 'Muslim') |
         (df.major_religion == 'Traditional'))].reset_index(drop=True)
# df = df[df.n_immg_18 > 10].reset_index(drop=False)
df.index = [df.iso, df.district, df.bch10, df.major_religion]
df = df.drop(['iso', 'district', 'bch10', 'major_religion'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district', 'bd', 'u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t']
df.index = [df.iso, df.district, df.bd]
df = df.drop(['iso', 'district', 'bd'], axis=1)
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district'] + ['{}_{}'.format(var, bd) 
                                    for var in ['u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t'] 
                                    for bd in [1980, 1990]]

# liberia, mali, nigeria, togo we use 1990s

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_c_1980'] = df.u_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_m_1980'] = df.u_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'u_t_1980'] = df.u_t_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_c_1980'] = df.d_c_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_m_1980'] = df.d_m_1990

df.loc[(df.iso == 'LBR') |
       (df.iso == 'MLI') |
       (df.iso == 'NGA') |
       (df.iso == 'TGO'), 'd_t_1980'] = df.d_t_1990

df1980 = df.drop(['u_c_1990', 'u_m_1990', 'u_t_1990', 'd_c_1990', 'd_m_1990', 'd_t_1990'], axis=1)

## All birth decades

In [4]:
df = pd.read_csv(os.path.join(wdir,'_F_dist_religion_bch10.csv'))
df = df[['iso', 'district', 'bch10', 'major_religion', 'immg_18', 'n_immg_18', 'imdwmg_18', 'n_imdwmg_18']]
df = df[(~pd.isnull(df.immg_18)) | (~pd.isnull(df.imdwmg_18)) ]
df = df[df.major_religion.isin(['Christian', 'Muslim', 'Traditional'])]
df['xu'] = df.immg_18 * df.n_immg_18
df['xd'] = df.imdwmg_18 * df.n_imdwmg_18
del df['immg_18'], df['imdwmg_18'], df['bch10']
df = df[['n_immg_18', 'n_imdwmg_18', 'xu', 'xd']].groupby([df.iso, df.district, df.major_religion]).sum().reset_index(drop=False)
# df = df[df.n_immg_18 > 10]
df['u'] = df.xu / df.n_immg_18
df['d'] = df.xd / df.n_imdwmg_18
del df['xu'], df['xd'], df['n_immg_18'], df['n_imdwmg_18']
df.index = [df.iso, df.district, df.major_religion]
del df['iso'], df['district'], df['major_religion']
df = df.unstack().reset_index(drop=False)
df.columns = df.columns.droplevel()
df.columns = ['iso', 'district', 'u_c', 'u_m', 'u_t', 'd_c', 'd_m', 'd_t']

## creating sumstats

In [5]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

def sumstats(pandas_series, majrel, direction):

    ss = [majrel, direction]
    pandas_series = pandas_series.dropna()
    ss.append(pandas_series.shape[0])
    ss.append(np.round(pandas_series.mean(),3))
    ss.append(pandas_series.min())
    ss.append(np.round(np.percentile(pandas_series, 10),3))
    ss.append(np.round(np.percentile(pandas_series, 50),3))
    ss.append(np.round(np.percentile(pandas_series, 90),3))
    ss.append(np.round(pandas_series.max(),3))
    return ss
    

def make_sumstats_df(df):
    
    rows = []
    rels = ['Christian', 'Muslim', 'Traditional'] * 2
    dirs = ['up'] * 3 + ['down'] * 3
    for ic, col in enumerate(df.columns[2:]):
        majrel = rels[ic]
        direction = dirs[ic]
        
        rows.append(sumstats(df[col], majrel, direction))
        
    df = pd.DataFrame(rows, columns = ['religion', 'direction', 'N', 'mean', 'min', 'p10', 'p50', 'p90', 'max'])
    return df

In [6]:
dfall = make_sumstats_df(df)
df1980 = make_sumstats_df(df1980)

## writing .tex tables

### all birth decades

In [7]:
tt = TexTab(dfall)

In [8]:
cns = ['religion', 'direction', 'N', 'mean', 'min', 'p10', 'p50', 'p90', 'max']
lcols = ['religion', 'direction']
gaps = ['']
cap = "Summary statistics, district-level IM, ages 14-18, all birth decades"
lab = "tab:_sumstats_im_dist_majrel_1418_allbch"
lines = []
notes = '''TABLES NOTES.
'''

In [9]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
tab = re.sub('\n&&\(1\).+\\\\\n\\\\hline', '', tab)
tab = re.sub('\\\\columnwidth', '0.6\\\\columnwidth', tab)

In [10]:
fh = open(outdir + "/_2_sumstats_dist_im_majrel_1418_allbch.tex", "w")
fh.write(tab)
fh.close()

### 1980s

In [11]:
tt = TexTab(df1980)

In [12]:
cns = ['religion', 'direction', 'N', 'mean', 'min', 'p10', 'p50', 'p90', 'max']
lcols = ['religion', 'direction']
gaps = ['']
cap = "Summary statistics, district-level IM, ages 14-18, 1980s"
lab = "tab:_sumstats_im_dist_majrel_1418_1980bch"
lines = []
notes = '''TABLES NOTES.
'''

In [13]:
tab = tt.create_tab_onepanel(cns=cns, gaps=gaps, lcols=lcols,
                             cap=cap, lab=lab, lines=lines, notes=notes, clines=False)
tab = re.sub('\n&&\(1\).+\\\\\n\\\\hline', '', tab)
tab = re.sub('\\\\columnwidth', '0.6\\\\columnwidth', tab)

In [14]:
fh = open(outdir + "/_2_sumstats_dist_im_majrel_1418_1980bch.tex", "w")
fh.write(tab)
fh.close()