In [1]:
import pandas as pd
import numpy as np
import country_converter as coco
import re
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
indir_prepro = os.path.join(rootdir, '_1_preprocessing', 'code')
indir_inter = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [3]:
dff = pd.read_csv(os.path.join(indir_prepro, '_census_fractions.csv'))
df = pd.read_csv(os.path.join(indir_inter, '_nobstab_app_newdata.csv'))
df = pd.merge(df, dff, on=['iso', 'year'], how='left')
df['country'] =  coco.convert(names=list(df['iso']), to='name_short')
df = df[['country', 'year', 'fraction', 'ni',
         'ni_r', 'ni_a', 'ni_e',
         'ni100_e', 'ni25_e', 'ni18_e',
         'ni100_ec0', 'ni25_ec0', 'ni18_ec0']]
df.columns = ['country',
              'year',
              'full census fraction',
              'number of individuals in raw data',
              'number of individuals with religion observed',
              'number of individuals with religion + \
               own age observed',
              'number of individuals with religion + \
               own age + own education observed',
              'number of individuals, aged 14+, with \
               religion + own age + own education observed',
              'number of individuals, aged 14-25, with \
               religion + own age + own education observed',
              'number of individuals, aged 14-18, with \
               religion + own age + own education observed',
              'number of individuals, aged 14+, with \
               religion + own age + own education + \
               previous generation education observed',
              'number of individuals, aged 14-25, with \
               religion + own age + own education + \
               previous generation education observed',
              'number of individuals, aged 14-18, with \
               religion + own age + own education + \
               previous generation education observed'              
              ]
tot = pd.DataFrame(df.iloc[:,3:].sum(axis=0)).T
tcs = list(tot.columns)
tot['country'] = 'total'
tot['year'] = ''
tot['full census fraction'] = ''
tot = tot[['country', 'year', 'full census fraction'] + tcs]
df = pd.concat([df, tot], axis=0)
df

Unnamed: 0,country,year,full census fraction,number of individuals in raw data,number of individuals with religion observed,number of individuals with religion + own age observed,number of individuals with religion + own age + own education observed,"number of individuals, aged 14+, with religion + own age + own education observed","number of individuals, aged 14-25, with religion + own age + own education observed","number of individuals, aged 14-18, with religion + own age + own education observed","number of individuals, aged 14+, with religion + own age + own education + previous generation education observed","number of individuals, aged 14-25, with religion + own age + own education + previous generation education observed","number of individuals, aged 14-18, with religion + own age + own education + previous generation education observed"
0,Benin,1992.0,10.0,498419,495114,494900,433274,255736,100988,44455,108664,65040,34784
1,Benin,2002.0,10.0,685467,685467,685467,612658,373452,155832,69048,160458,104331,57364
2,Benin,2013.0,10.0,1009693,1009693,1009693,911604,559525,240049,108694,244182,170580,93329
3,Burkina Faso,1996.0,10.0,1081046,1025717,1021722,802832,552062,226254,114088,250828,157808,95669
4,Burkina Faso,2006.0,10.0,1417824,1417824,1410123,1244291,770161,321384,151393,327195,211275,123364
5,Botswana,2001.0,10.0,168676,118718,118211,117809,109509,44734,20578,42573,29119,16077
6,Botswana,2011.0,10.0,201752,147294,146827,146149,138094,48829,20638,47082,28817,14276
7,Cameroon,2005.0,10.0,1772359,1747716,1747716,1524571,1003327,431550,197049,426755,295388,162672
8,Egypt,1986.0,14.1,6799093,6797805,6793098,5417612,4261935,1609560,721960,1931312,1345068,693275
9,Egypt,1996.0,10.0,5902243,5902243,5901839,4453382,3810835,1471285,718874,1616808,1230963,695795


# Writing the table

## Column names

In [4]:
chlen = 16
incols = []
for col in df.columns:
    il = col.split(' ')
    # ilrev = il.copy()
    # ilrev.reverse()
    ol = []
    thislen = 0
    thisline = []
    for word in il:
        if len(word) + thislen <= chlen:
            thisline.append(word)
            thislen += len(word)
        else:
            ol.append(thisline)
            thislen = len(word)
            thisline = [word]
    ol.append(thisline)
    incols.append(ol)
maxlen = max([len(subl) for subl in incols])
for subl in incols:
    while len(subl) < maxlen:
        subl.append([])
outcols = []
for col in incols:
    outlines = []
    for line in col:
        outlines.append(" ".join(line))
    outcols.append(outlines)

## Header

In [5]:
l1 = "\singlespacing\n"
l2 = "\\begin{table}[ht!]\n"
l3 = "\\centering\n"
l4 = "\\caption{Sample construction}\n"
l5 = "\\label{app:tab:nobs}\n"
l6 = "\\resizebox{0.97\\columnwidth}{!}{\n"
l7 = "\\begin{tabular}{l*{" + '{}'.format(df.shape[1]) + "}l}\n"
l8 = "\\hline\n"
l9 = '&&&' + ''.join(['({}) & '.format(i+1)  if i < df.shape[1]-3 else
              '({})'.format(i+1) for i in range(df.shape[1]-3)]) + "\\\\\n"
l10 = "\\hline\n"
l11l = []
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
l11l.append([])
for idc, col in enumerate(outcols):
    for idl, line in enumerate(col):
        if idc == 0:
            l11l[idl].append(line)
        else:
            l11l[idl].append("&"+line)
            
l11 = ''
for l11i in l11l:
    l11+="".join(l11i)+"\\\\ \n"
l11+="\\hline\\hline \n"

In [6]:
header = l1+l2+l3+l4+l5+l6+l7+l8+l9+l10+l11
print(header)

\singlespacing
\begin{table}[ht!]
\centering
\caption{Sample construction}
\label{app:tab:nobs}
\resizebox{0.97\columnwidth}{!}{
\begin{tabular}{l*{13}l}
\hline
&&&(1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) & (10) & \\
\hline
country&year&full census&number of&number of&number of&number of&number of&number of&number of&number of&number of&number of\\ 
&&fraction&individuals in raw&individuals with&individuals with&individuals with&individuals, aged&individuals, aged&individuals, aged&individuals, aged&individuals, aged&individuals, aged\\ 
&&&data&religion observed&religion +                own age&religion +                own age +&14+, with                religion&14-25, with               &14-18, with               &14+, with                religion&14-25, with               &14-18, with               \\ 
&&&&&observed&own education&+ own age + own&religion + own age +&religion + own age +&+ own age + own&religion + own age +&religion + own age +\\ 
&&&&&&observed&educatio

## Body

In [7]:
body=''
rows = df.shape[0]
cols = df.shape[1]
for i in range(rows):
    row=''
    for j in range(cols):
        if j == 0:
            row+="{}".format(df.iloc[i,j])
        elif 'str' in str(type(df.iloc[i,j])) or 'float' in str(type(df.iloc[i,j])):
            row+="&{}".format(df.iloc[i,j])
        elif 'int' in str(type(df.iloc[i,j])):
            if j == 1:
                row+="&{}".format(df.iloc[i,j])
            else:
                row+="&{:,}".format(df.iloc[i,j])

    row += "\\\\ \n"
    body +=row
body+="\\hline\n"
body = re.sub(r'(\ntotal)', r'\n\\hline\1', body, flags=re.DOTALL)

In [8]:
l1 = "\\end{tabular}\n"
l2 = "}"
l3 = "\\captionsetup{size=scriptsize, justification=justified, width=\columnwidth}\n"
l4 = '''\\caption*{\\textbf{DETAILED TABLE NOTES}}\n'''
l5 = "\\end{table}\n"
l6 = "\onehalfspacing"

In [9]:
footer=l1+l2+l3+l4+l5+l6

## Final cleanup and writing

In [10]:
textab = header+body+footer

In [11]:
fh = open(outdir + "/_apptab_nobs.tex", "w")
fh.write(textab)
fh.close()