In [1]:
import pandas as pd
import os

In [2]:
def compute_migshares(df, timevar, origin_fe=None):
    
    iso_codes = ['BEN', 'BFA', 'BWA', 'CMR', 'EGY', 
                 'ETH', 'GHA', 'GIN', 'LBR', 'MLI', 
                 'MOZ', 'MUS', 'MWI', 'RWA', 'SEN', 
                 'SLE', 'TGO', 'UGA', 'ZAF', 'ZMB']
    
    religs = ['Christian', 'Muslim', 'Traditional']
    
    if timevar == 'age':
        x = (df[df.bd==1980][['iso', timevar, 'year', 'major_religion', 'org', 'migrant', 'w']]
             .groupby(['iso', timevar, 'year', 'major_religion', 'org', 'migrant'])
             .sum()
             .reset_index(drop=False))
    else:
        x = (df[['iso', timevar, 'year', 'major_religion', 'org', 'migrant', 'w']]
             .groupby(['iso', timevar, 'year', 'major_religion', 'org', 'migrant'])
             .sum()
             .reset_index(drop=False))

    x = x.set_index(['iso', timevar, 'year', 'major_religion', 'org', 'migrant'], drop=True).unstack().fillna(0).reset_index(drop=False)
    x.columns = ['iso', timevar, 'year', 'major_religion', 'org', 'w0', 'w1']
    x['tot'] = x.w0 + x.w1
    x['shr'] = x.w1 / x.tot
    x.drop(['w0', 'w1'], axis=1, inplace=True)
    
    if origin_fe == 'unweighted':
        x['total_org_migshr'] = x.groupby(['iso', timevar, 'year', 'org'])['shr'].transform('mean')
        x['shr'] = x.shr - x.total_org_migshr        
        x.drop('total_org_migshr', axis=1, inplace=True)
    elif origin_fe == 'weighted':
        x['tottot'] = x.groupby(['iso', timevar, 'year', 'org'])['tot'].transform('sum')
        x['orgobshr'] = x.tot / x.tottot
        x['wshr'] = x.shr * x.orgobshr
        x['total_org_migshr'] = x.groupby(['iso', timevar, 'year', 'org'])['wshr'].transform('sum')

        # x['aux'] = x.groupby(['iso', timevar, 'year', 'org'])['orgobshr'].transform('sum')
        # print(x[x.major_religion.isin(religs)].aux.min(), x[x.major_religion.isin(religs)].aux.max())
        # x.drop('aux', axis=1)

        x['shr'] = x.shr - x.total_org_migshr
        x.drop(['tottot', 'orgobshr', 'wshr', 'total_org_migshr'], axis=1, inplace=True)
        
    x = x.groupby(['iso', 'bd', 'major_religion', 'org'])[['shr']].mean()
    
    return x

In [3]:
cwd = globals()['_dh'][0] 
df = pd.read_csv(os.path.join(os.path.dirname(cwd), 'data', 'migrant_stock_data_all.csv'))

In [4]:
n = compute_migshares(df, 'bd', origin_fe=None)
n.columns = ['shr_nofe']
u = compute_migshares(df, 'bd', origin_fe='unweighted')
u.columns = ['shr_uwfe']
w = compute_migshares(df, 'bd', origin_fe='weighted')
w.columns = ['shr_wfe']

In [5]:
out = n.join(u).join(w).reset_index(drop=False)

In [6]:
out.to_csv(os.path.join(os.path.dirname(cwd), 'data', 'migrant_shares_per_origin_for_regs.csv'), index=False)