In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import os

In [2]:
def define_x_countries_have_data(df, pop, timevar, mint, maxt, suff):
    
    religs = ['Christian', 'Muslim', 'Traditional']

    if timevar == 'bd':
        x = (df[['iso', 'bd', 'year', 'major_religion0', 'major_religion', f'w{suff}']]
             .groupby(['iso', 'bd', 'year', 'major_religion0', 'major_religion'])
             .sum()
             .reset_index(drop=False))
        x['tot'] = x.groupby(['iso', 'bd', 'year', 'major_religion0'])[f'w{suff}'].transform('sum')
        x['shr'] = x[f'w{suff}'] / x.tot
        x.drop(['year', f'w{suff}', 'tot'], axis=1, inplace=True)
        x = x.groupby(['iso', 'bd', 'major_religion0', 'major_religion']).mean().reset_index(drop=False)
        y = x[(x.bd>=mint) & (x.bd<=maxt)].reset_index(drop=True).copy()
    else:
        x = (df[['iso', 'bd', 'year', 'age', 'major_religion0', 'major_religion', f'w{suff}']]
             .groupby(['iso', 'bd', 'year', 'age', 'major_religion0', 'major_religion'])
             .sum()
             .reset_index(drop=False))
        x['tot'] = x.groupby(['iso', 'bd', 'year', 'age', 'major_religion0'])[f'w{suff}'].transform('sum')
        x['shr'] = x[f'w{suff}'] / x.tot
        x.drop(['year', f'w{suff}', 'tot'], axis=1, inplace=True)
        x = x[x.bd==1980].reset_index(drop=True)
        x.drop('bd', axis=1, inplace=True)
        x = x.groupby(['iso', 'age', 'major_religion0', 'major_religion']).mean().reset_index(drop=False)
        y = x[(x.age>=mint) & (x.age<=maxt)].reset_index(drop=True).copy()        

    countries_have_data = {}
    for relig_i in religs:
        for relig_j in religs:
            yij = (y[(y.major_religion0==relig_i) & 
                     (y.major_religion==relig_j)]
                   .reset_index(drop=True)
                   .drop(['major_religion0', 'major_religion'], axis=1)
                   .set_index(['iso', timevar], drop=True)
                   .unstack())
            yij = yij.fillna(0)
            yij[yij>0] = 1
            yij.columns = yij.columns.droplevel(0)
            yij.columns = [f'{c}_{relig_j[0]}' for c in yij.columns]
            if relig_j == 'Christian':
                countries_have_data[relig_i] = yij
            else:
                countries_have_data[relig_i] = countries_have_data[relig_i].join(yij, how='outer')
        countries_have_data[relig_i] = countries_have_data[relig_i].join(iso_df, how='outer').drop('aux', axis=1).fillna(0)
        if timevar == 'bd':
            for t in range(mint, maxt+10, 10):
                countries_have_data[relig_i][t] = countries_have_data[relig_i][[f'{t}_C', f'{t}_M', f'{t}_T']].max(axis=1)
                countries_have_data[relig_i].drop([f'{t}_C', f'{t}_M', f'{t}_T'], axis=1, inplace=True)
        else:
            for t in range(mint, maxt+1, 1):
                countries_have_data[relig_i][t] = countries_have_data[relig_i][[f'{t}_C', f'{t}_M', f'{t}_T']].max(axis=1)
                countries_have_data[relig_i].drop([f'{t}_C', f'{t}_M', f'{t}_T'], axis=1, inplace=True)
           
    if timevar == 'bd':
        x['bdx'] = x.bd
        x.loc[x.bd<1950, 'bd'] = 1950
        x.loc[x.bd>2000, 'bd'] = 2000
        x = pd.merge(x, pop, on=['iso', 'bd'], how='left')
        x['bd'] = x.bdx
        x.drop('bdx', axis=1, inplace=True)
    else:
        x = pd.merge(x, pop[pop.bd==1980].drop('bd', axis=1), on=['iso'], how='left')

    # normalize the population share for each birth decade/age X parent-religion
    x['wshr'] = x.shr * x.popshr
    x['aux'] = x.groupby([timevar, 'major_religion0'])['wshr'].transform('sum')
    x['wshr'] = x.wshr / x.aux
    x['aux'] = x.groupby([timevar, 'major_religion0'])['wshr'].transform('sum')
    # print(x[x.major_religion0.isin(religs)].aux.min(), x[x.major_religion0.isin(religs)].aux.max())
    x.drop(['iso', 'shr', 'popshr', 'aux'], axis=1, inplace=True)
    x = x.groupby([timevar, 'major_religion0', 'major_religion']).sum().reset_index(drop=False)
    x = x[(x[timevar]>=mint) & (x[timevar]<=maxt)].reset_index(drop=True)
    
    return x, countries_have_data


def make_plots(x, countries_have_data, timevar, mint, maxt):
    
    religs = ['Christian', 'Muslim', 'Traditional']
    linecolors = ['r', 'b', 'k']
    linestyles = ['-', '--', ':']

    f, ax = plt.subplots(2,3, figsize=(25,8))

    for i in range(3):
        for j in range(3):
            ax[0,i].plot(x[(x.major_religion0==religs[i]) & (x.major_religion==religs[j])][timevar], 
                         x[(x.major_religion0==religs[i]) & (x.major_religion==religs[j])].wshr,
                         color=linecolors[j],
                         linestyle=linestyles[j],
                         label='young:' + religs[j])
        ax[0,i].grid(color='lightgray')
        ax[0,i].set_title('old: ' + religs[i])
        ax[0,i].legend()
        if timevar == 'bd':
            ax[0,i].set_xlabel('birth decade')
        else:
            ax[0,i].set_xlabel('age')
        ax[0,i].set_ylabel('kids religion share')
        ax[0,i].set_ylim([-0.05,1.05])

        ax[1,i].imshow(countries_have_data[religs[i]], cmap=colors.ListedColormap(['grey', 'white']))
        
        if timevar == 'bd':
            ax[1,i].set_aspect(0.13)
            xtlabs = np.arange(mint, maxt+10, 10)
            ax[1,i].set_xticks(np.arange(len(xtlabs)))
            ax[1,i].set_xticklabels(xtlabs)
            ax[1,i].hlines(y=np.arange(0, 21)+0.5, xmin=np.full(21, 0)-0.5, xmax=np.full(21, len(xtlabs))-0.5, color="k")
            ax[1,i].vlines(x=np.arange(0, len(xtlabs))+0.5, ymin=np.full(len(xtlabs), 0)-0.5, ymax=np.full(len(xtlabs), 21)-0.5, color="black")
        else:
            ax[1,i].set_aspect(0.35)
            xtlabs = np.arange(mint, maxt+1, 1)
            ax[1,i].set_xticks(np.arange(len(xtlabs)))
            ax[1,i].set_xticklabels(xtlabs)
            ax[1,i].hlines(y=np.arange(0, 21)+0.5, xmin=np.full(21, 0)-0.5, xmax=np.full(21, len(xtlabs))-0.5, color="k")
            ax[1,i].vlines(x=np.arange(0, len(xtlabs))+0.5, ymin=np.full(len(xtlabs), 0)-0.5, ymax=np.full(len(xtlabs), 21)-0.5, color="black")
        ax[1,i].set_yticks(np.arange(21))
        ax[1,i].set_yticklabels(countries_have_data[religs[i]].index)
        ax[1,i].set_title('data availability by country')

    f.tight_layout()
    plt.close(f)
    return f

def make_plot_simple(x, timevar, mint, maxt, majrel0):
    
    religs = ['Christian', 'Muslim', 'Traditional']
    linecolors = ['r', 'b', 'k']
    linestyles = ['-', '--', ':']

    f, ax = plt.subplots(1,1, figsize=(10,6))

    for j in range(3):
        ax.plot(x[(x.major_religion0==majrel0) & (x.major_religion==religs[j])][timevar], 
                x[(x.major_religion0==majrel0) & (x.major_religion==religs[j])].wshr,
                color=linecolors[j],
                linestyle=linestyles[j],
                label='young:' + religs[j])
    ax.grid(color='lightgray')
    ax.legend()
    if timevar == 'bd':
        ax.set_xlabel('birth decade')
    else:
        ax.set_xlabel('age')
    ax.set_ylabel('kids religion share')
    ax.set_ylim([-0.05,1.05])
    
    f.tight_layout()
    plt.close(f)
    return f

In [3]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [4]:
iso_codes = ['BEN', 'BFA', 'BWA', 'CMR', 'EGY', 
             'ETH', 'GHA', 'GIN', 'LBR', 'MLI', 
             'MOZ', 'MUS', 'MWI', 'NGA', 'RWA', 
             'SEN', 'SLE', 'TGO', 'UGA', 'ZAF', 
             'ZMB']
pop = pd.read_csv(os.path.join(wdir, 'pop_world.csv'))

pop = pop[pop.iso.isin(iso_codes)].reset_index(drop=True)
pop['tot'] = pop.groupby('year')['pop'].transform('sum')
pop['popshr'] = pop['pop'] / pop['tot']
pop['bd'] = pop.year - 18
pop['rem'] = pop.bd % 10
pop = pop[pop.rem==0].reset_index(drop=True)[['iso', 'bd', 'popshr']]

# 1. All observations

In [5]:
df = pd.read_csv(os.path.join(wdir, 'religion_transition_data_all.csv'))

In [6]:
iso_df = pd.DataFrame({'aux': 1}, index=sorted(list(set(df.iso))))

## 1.1 Not focusing on age

1) calculate the share of kids of old with religion x that have religion y by 
    - iso
    - birth decade
    - census year
2) take an average of the shares by
    - iso
    - birth decade
3) weight the shares by population shares and sum 

In [7]:
x, countries_have_data = define_x_countries_have_data(df, pop, 'bd', 1950, 2010, '0')
f = make_plots(x, countries_have_data, 'bd', 1950, 2010)
fc = make_plot_simple(x, 'bd', 1950, 2010, 'Christian')
fm = make_plot_simple(x, 'bd', 1950, 2010, 'Muslim')
ft = make_plot_simple(x, 'bd', 1950, 2010, 'Traditional')
fc.savefig(outdir + '/religion_transition_0_bd_christian.pdf', bbox_inches='tight')
fm.savefig(outdir + '/religion_transition_0_bd_muslim.pdf', bbox_inches='tight')
ft.savefig(outdir + '/religion_transition_0_bd_traditional.pdf', bbox_inches='tight')

In [8]:
# f
f.savefig(outdir + '/religion_transition_0_bd.pdf', bbox_inches='tight')

## 1.2 Focusing on age

1) calculate the share of kids of old with religion x that have religion y by 
    - iso
    - birth decade
    - census year
2) focus on the 1980s birth decade
3) take an average of the shares by
    - iso
    - age
4) weight the shares by population shares and sum 

In [9]:
x, countries_have_data = define_x_countries_have_data(df, pop, 'age', 14, 30, '0')
f = make_plots(x, countries_have_data, 'age', 14, 30)

In [10]:
# f
f.savefig(outdir + '/religion_transition_0_age.pdf', bbox_inches='tight')

# 2. Among kids of uneducated parents, by educational status of kids

In [11]:
df = pd.read_csv(os.path.join(wdir, 'religion_transition_data_ed.csv'))
df = df[df.ec0==0].reset_index(drop=True)

## 2.1 By birth decade

### 2.1.1 All kids

In [12]:
x, countries_have_data = define_x_countries_have_data(df, pop, 'bd', 1950, 2000, '0')
f = make_plots(x, countries_have_data, 'bd', 1950, 2000)

In [13]:
# f
f.savefig(outdir + '/religion_transition_0_bd_edall.pdf', bbox_inches='tight')

### 2.1.2 Educated kids

In [14]:
x, countries_have_data = define_x_countries_have_data(df[df.eckid>=1].reset_index(drop=True), pop, 'bd', 1950, 2000, '0')
f = make_plots(x, countries_have_data, 'bd', 1950, 2000)

In [15]:
# f
f.savefig(outdir + '/religion_transition_0_bd_ed1.pdf', bbox_inches='tight')

### 2.1.3 Uneducated kids

In [16]:
x, countries_have_data = define_x_countries_have_data(df[df.eckid==0].reset_index(drop=True), pop, 'bd', 1950, 2000, '0')
f = make_plots(x, countries_have_data, 'bd', 1950, 2000)

In [17]:
# f
f.savefig(outdir + '/religion_transition_0_bd_ed0.pdf', bbox_inches='tight')

## 2.2 By age

### 2.2.1 All kids

In [18]:
x, countries_have_data = define_x_countries_have_data(df, pop, 'age', 14, 30, '0')
f = make_plots(x, countries_have_data, 'age', 14, 30)

In [19]:
# f
f.savefig(outdir + '/religion_transition_0_age_edall.pdf', bbox_inches='tight')

### 2.2.2 Educated kids

In [20]:
x, countries_have_data = define_x_countries_have_data(df[df.eckid>=1].reset_index(drop=True), pop, 'age', 14, 30, '0')
f = make_plots(x, countries_have_data, 'age', 14, 30)

In [21]:
# f
f.savefig(outdir + '/religion_transition_0_age_ed1.pdf', bbox_inches='tight')

### 2.2.3 Uneducated kids

In [22]:
x, countries_have_data = define_x_countries_have_data(df[df.eckid==0].reset_index(drop=True), pop, 'age', 14, 30, '0')
f = make_plots(x, countries_have_data, 'age', 14, 30)

In [23]:
# f
f.savefig(outdir + '/religion_transition_0_age_ed0.pdf', bbox_inches='tight')