In [1]:
import numpy as np
import pandas as pd
import country_converter as coco
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
import os

In [2]:
def create_step_arrays(x, y):
    return np.concatenate([np.array([0]),np.repeat(x, 2)[1:]]), \
           np.concatenate([np.array([0]),np.repeat(y, 2)[:-1]])

In [3]:
current_folder = globals()['_dh'][0]
rootdir = os.path.dirname(os.path.dirname(current_folder))
wdir = os.path.join(rootdir, '_2_intermediate', 'data')
outdir = os.path.join(rootdir, '_3_figures_tables', 'data')

In [4]:
df = pd.read_csv(os.path.join(wdir,'_data_for_CDFs.csv'))

In [5]:
def plot_cdf(df, majrel, weighted, showfig):
    
    df = df[df.major_religion == majrel].reset_index(drop=True)
    df = df.sort_values(['bd', 'ysc']).reset_index(drop=True)
    if weighted:
        df['nysc'] = df.nwt
    else:
        df['nysc'] = df.n
    df['cusu'] = df[['bd', 'nysc']].groupby(['bd']).transform('cumsum')
    df['maxbch'] = df[['bd', 'cusu']].groupby(['bd']).transform('max')
    df['shr'] = df['cusu'] / df['maxbch']
    
    f, ax = plt.subplots(1,1, figsize=(10, 6))
    for bch in [1950, 1960, 1970, 1980]:
        x, y = create_step_arrays(df[df.bd==bch].ysc, df[df.bd==bch].shr)
        ax.plot(x, y, label=bch)

        ax.set_xticks(np.arange(19))
        ax.set_ylabel('cumulative share', size=15)
        ax.set_xlabel('years of schooling', size=15)
        ax.tick_params(axis='both', which='major', labelsize=15)
    
    plt.legend(prop={'size': 15})
    if showfig:
        plt.show()
    else:
        if weighted:
            f.savefig('{}/_app_CDF_ysc_{}_weighted.pdf'.format(outdir, majrel), bbox_inches='tight')
            plt.close(f)
        else:
            f.savefig('{}/_app_CDF_ysc_{}_unweighted.pdf'.format(outdir, majrel), bbox_inches='tight')
            plt.close(f)           

In [6]:
plot_cdf(df, 'Christian', weighted=True, showfig=False)
plot_cdf(df, 'Christian', weighted=False, showfig=False)
plot_cdf(df, 'Muslim', weighted=True, showfig=False)
plot_cdf(df, 'Muslim', weighted=False, showfig=False)
plot_cdf(df, 'Traditional', weighted=True, showfig=False)
plot_cdf(df, 'Traditional', weighted=False, showfig=False)