In [1]:
# This aim of this code is to disaggregate policy scores by one tool or the combination of several tools, which will be
# used as covariates of policy reform in the hukou impact research project

In [None]:
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
# 1. Obtain score by multipling direction (res_all) with magnitude (mag_all) 

policy = pd.read_excel("PolicyData.xlsx", index_col=0)
policy['score'] = policy['res_all'] * policy['mag_all']
policy = policy[policy['score'] < 5]


#2. Assign weight to each row and generage weighted scores (might not use here, but generate for future purpose)

# Note that score is weighted by populatio proportion of a specific migrant group

mig_weight = {10:1,20:0.73,21:0.63,22:0.63,23:0.11,30:0.56,31:0.45,32:0.11,33:0.11,40:0.01,41:0.01,42:0.01,
          50:1.00,51:0.73,52:0.17,53:0.1,54:0.17,61:0.11,62:0.63,63:0.01,64:0.01,65:0.01}

policy['weight'] = [mig_weight[i] for i in policy['mig_group']]
policy['wscore'] = policy['score'] * policy['weight']
#policy.head()

# 3. Disaggregate scores by polity tools

policytools = {11:'access', 12:"ps", 21:'rp', 22:'collective hukou', 23:'unify', 24:'urbanize city villages',
               25:'tax transfer', 30:'public service', 31:'education', 32:'social insur', 33:'medical insur', 
               34:'pension insur', 35:'unemp insur',36:'housing', 37:'healthcare', 38:'adpat', 39:'welfare', 54:'emp scheme'}

# 3.1 Function that returns the scores of <= 2 policy tools combination

def scores(t1, t2):
    name = policytools[t1]
    return policy.loc[(policy['pol_tool'] == t1)|(policy['pol_tool'] == t2), "score"].groupby(level=0).agg({np.sum, np.mean}).rename(columns={"mean":'as_'+name, "sum":'ts_'+name})
    
#scores(23,24)

# 3.2 Function that calculates and formats scores for > 2 policy tools combination

def format(series, name):
    return series.groupby(level=0).agg({np.sum, np.mean}).rename(columns={"mean":'as_'+name, "sum":'ts_'+name})

itg = policy.loc[((policy['pol_tool'] >=25) & (policy['pol_tool'] <= 39)) | (policy['pol_tool'] == 54), "score"]
insur = policy.loc[(policy['pol_tool'] >=32) & (policy['pol_tool'] <= 35), "score"]
ser = policy.loc[((policy['pol_tool'] >=36) & (policy['pol_tool'] <=39)) | ((policy['pol_tool'] ==30) | (policy['pol_tool'] ==31) | policy['pol_tool'] == 54), "score"]

# 4. Merge the disaggregated scores in one table

dfs = [scores(11,11), scores(12,12), scores(21,22), scores(23,24), format(itg,"itg"), format(insur,"insur"), format(ser,"ser")]

df_tools = reduce(lambda left,right: pd.merge(left, right, how='left',left_index=True, right_index=True).fillna(0), dfs)

df_tools.describe().T.to_excel('PolicyTools_summary.xlsx') # Descriptive statistics of the scores distribution

df_tools.to_excel('PolicyTools.xlsx')