In [1]:
import pandas as pd
import numpy as np
import country_converter as coco
import os

In [2]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [3]:
df = pd.read_csv(os.path.join(wdir, 'religions__mastertab.csv'))
df['country'] =  coco.convert(names=list(df['iso']), to='name_short')
df = df[~pd.isnull(df.major_religion)].sort_values(['iso', 'shr', 'shrorg'], ascending=[True, False, False]).reset_index(drop=True).copy(deep=True)
df = df[['country'] + list(df.columns)[1:-1]]
df = df[['country',
         'religiond_str',
         'nrorg',
         'shrorg',
         'nrorge',
         'shrorge',
         'major_religion',
         'nr',
         'shr',
         'nre',
         'shre']]

df.columns = ['country',
              'detailed religion name, IPUMS',
              'number of observations in religion', 
              'group share, religion', 
              'number of observations in religion with education of the old observed',
              'group share, religion with education of the old observed',
              'major religion name',
              'number of observations in major religion', 
              'group share, major religion', 
              'number of observations in major religion with education of the old observed',
              'group share, major religion with education of the old observed']              

an annoying bit of cleanup. this is just due to mislabelling, the data are correct

In [4]:
df.loc[(df.country == 'Benin') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Benin'
df.loc[(df.country == 'Botswana') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Botswana'
df.loc[(df.country == 'Egypt') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Egypt'
df.loc[(df.country == 'Sierra Leone') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Sierra Leone'
df.loc[(df.country == 'Uganda') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Uganda'
df.loc[(df.country == 'South Africa') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, South Africa'
df.loc[(df.country == 'Zambia') & (df['detailed religion name, IPUMS'] == 'Other, Rwanda'), 'detailed religion name, IPUMS'] = 'Other, Zambia'

# Writing the table

## Column names

In [5]:
chlen = 16
incols = []
for col in df.columns[1:]:
    il = col.split(' ')
    ol = []
    thislen = 0
    thisline = []
    for word in il:
        if len(word) + thislen <= chlen:
            thisline.append(word)
            thislen += len(word)
        else:
            ol.append(thisline)
            thislen = len(word)
            thisline = [word]
    ol.append(thisline)
    incols.append(ol)
maxlen = max([len(subl) for subl in incols])
for subl in incols:
    while len(subl) < maxlen:
        subl.append([])
outcols = []
for col in incols:
    outlines = []
    for line in col:
        outlines.append(" ".join(line))
    outcols.append(outlines)

## Header

In [6]:
l1 = "\\tiny\n"
l2 = "\singlespacing\n"
l3 = "\\begin{longtable}{l*{" + '{}'.format(df.shape[1]-1) + "}l}\n"
l4 = "\\caption{Religious groups by country and their sizes}\n"
l5 = "\\label{app:tab:groups}\\\\\n"
l6 = "\\hline\n"
l7 = ''.join(['({}) & '.format(i+1)  if i < df.shape[1]-2 else '({})'.format(i+1) for i in range(df.shape[1]-1)]) + "\\\\\n"
l8 = "\\hline\n"
l9l = []
l9l.append([])
l9l.append([])
l9l.append([])
l9l.append([])
l9l.append([])
l9l.append([])
for idc, col in enumerate(outcols):
    for idl, line in enumerate(col):
        if idc == 0:
            l9l[idl].append(line)
        else:
            l9l[idl].append("&"+line)
            
l9 = ''
for l9i in l9l:
    l9+="".join(l9i)+"\\\\ \n"
l9+="\\hline\\hline \n"

l10 = "\\endfirsthead\n"
l11 = "\\caption{Religious groups by country and their sizes, continued}\\\\\n"
l12 = l6
l13 = l7
l14 = l8
l15 = l9
l16 = "\\endhead\n"

In [7]:
header = l1+l2+l3+l4+l5+l6+l7+l8+l9+l10+l11+l12+l13+l14+l15+l16

## Body

In [8]:
body = '\\multicolumn{10}{l}{\\textbf{Benin}}\\\\\n\\hline\n'
country = "Benin"
rows = df.shape[0]
cols = df.shape[1]
for i in range(rows):
    countryi = df.iloc[i,0]
    
    if countryi != country:
        row = '\\hline\n\\multicolumn{10}{l}{}\\\\\n'
        row += '\\multicolumn{10}{l}{\\textbf{' + countryi + '}}\\\\\n\\hline\n'
        country = countryi

    else:
        row = ''
    for j in range(1, cols):
        if j == 1:
            row+="{}".format(df.iloc[i,j])
        elif 'str' in str(type(df.iloc[i,j])) or 'float' in str(type(df.iloc[i,j])):
            if 'float' in str(type(df.iloc[i,j])):
                row+=f"&{df.iloc[i,j]:.3f}"
            else:
                row+="&{}".format(df.iloc[i,j])
        elif 'int' in str(type(df.iloc[i,j])):
            row+="&{:,}".format(df.iloc[i,j])

    row += "\\\\ \n"
    body +=row
body+="\\hline\n"

## Footer

In [9]:
l1 = "\\end{longtable}\n"
l2 = "\\vspace{-0.8cm}\n"
l3 = "\\begin{table}[H]\n"
l4 = "\\captionsetup{size=scriptsize, justification=justified, width=\columnwidth}\n"
l5 = '''\\caption*{\\textbf{DETAILED TABLE NOTES}}\n'''
l6 = "\\end{table}\n"
l7 = "\onehalfspacing"

In [10]:
footer=l1+l2+l3+l4+l5+l6+l7

## Final cleanup and writing

In [11]:
textab = header+body+footer

In [12]:
textab = textab.replace('Full Gospel Church of God in Southern Africa', 'Full Gospel Church of God in SA')
textab = textab.replace('International Fellowship of Christian Churches', "Int'l Fellowship of Chr. Churches")
textab = textab.replace('Christian, Protestant Pentecostal', "Christian, Prot. Pentecostal")

In [13]:
fh = open(outdir + "/_apptab_groups.tex", "w")
fh.write(textab)
fh.close()