## Simulation 
We begin by specifying packages, reading in the dataset and generating some functions that we will use repeatedly to calculate poverty rates and gini coefficients as well as to generate simple percentage changes.

In the dataframe, we have spmu unit years as the level of the dataset. All values are averaged across 2018-2020.

In [1]:
import microdf as mdf
import pandas as pd
import numpy as np
import us
import plotly.express as px
import plotly.graph_objects as go
from ubicenter import format_fig

person_sim = pd.read_csv(
    "C:\\Users\\John Walker\\Desktop\\person_sim.csv.gz",
    # "jb/data/person_sim.csv.gz",
    compression="gzip")
person_sim.rename(columns={'scenario':'Scenario'}, inplace=True)

In [2]:
# Define a function to calculate poverty rates from the poverty flag
def pov(data, group):
    return pd.DataFrame(
        mdf.weighted_mean(data, "poverty_flag", "asecwt", groupby=group)
    )

def deep_pov(data, group):
    return pd.DataFrame(
        mdf.weighted_mean(data, "deep_poverty_flag", "asecwt", groupby=group)
)

# Define function to generate gini coefficients
def gin(data, group):
    return pd.DataFrame(
        data.groupby(group).apply(
            lambda x: mdf.gini(x, "spmtotres", "asecwt")
        )
    )

# Define percentage change function
def percent_change(new, old):
    return 100 * (new - old) / old

We generate poverty rates for the total population and by demographics of interest, namely sex, race, whether one is a child (under 6) and by state. We similarly generate gini coefficients for the total population and by state.

In [59]:
# Poverty rates by demographics of interest
poverty_rate = pov(person_sim, ["scenario", "ca"]) # Overall poverty rate
poverty_rate_sex = pov(person_sim, ["scenario", "ca", "sex"]) # Poverty rates by sex
poverty_rate_race_hispan = pov(person_sim, ["scenario", "ca", "race_hispan"])  # Poverty rates by race
poverty_rate_age = pov(person_sim, ["scenario", "ca", "age_cat"]) # Poverty rate by child age
poverty_rate_child = pov(person_sim, ["scenario", "ca", "child_6"]) # Poverty rate by child age

# State-based poverty rates
poverty_rate_state = pov(person_sim, ["scenario", "ca", "state"])

# Rename constructed poverty_rates
poverty_rates = [
    poverty_rate,
    poverty_rate_sex,
    poverty_rate_race_hispan,
    poverty_rate_state,
    poverty_rate_age,
]
for i in poverty_rates:
    i.rename({0: "poverty_rate"}, axis=1, inplace=True)

for j in poverty_rates:
    j = j.reset_index(inplace=True)

In [60]:
# Deep poverty rates by demographics of interest
deep_poverty_rate = deep_pov(person_sim, ["scenario", "ca"]) # Overall poverty rate
deep_poverty_rate_sex = deep_pov(person_sim, ["scenario", "ca", "sex"]) # Poverty rates by sex
deep_poverty_rate_race_hispan = deep_pov(person_sim, ["scenario", "ca", "race_hispan"])  # Poverty rates by race
deep_poverty_rate_age = deep_pov(person_sim, ["scenario", "ca", "age_cat"]) # Poverty rate by child age
deep_poverty_rate_child = pov(person_sim, ["scenario", "ca", "child_6"]) # Poverty rate by child age

# State-based deep poverty rates
deep_poverty_rate_state = deep_pov(person_sim, ["scenario", "ca", "state"])

# Rename constructed poverty_rates
deep_poverty_rates = [
    deep_poverty_rate,
    deep_poverty_rate_sex,
    deep_poverty_rate_race_hispan,
    deep_poverty_rate_state,
    deep_poverty_rate_age,
]
for i in deep_poverty_rates:
    i.rename({0: "deep_poverty_rate"}, axis=1, inplace=True)

for j in deep_poverty_rates:
    j = j.reset_index(inplace=True)

In [30]:
# Gini coefficients and state-based heterogenous gini coefficients
gini = gin(person_sim, ["scenario", "ca"])
gini_state = gin(person_sim, ["scenario", "ca", "state"])

# Rename constructed gini coefficients
ginis = [
    gini,
    gini_state,
]
for i in ginis:
    i.rename({0: "gini_coefficient"}, axis=1, inplace=True)
for j in ginis:
    j = j.reset_index(inplace=True)

In [69]:
poverty_rate.columns

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [14]:
# Poverty plot
# Use pivot table to construct difference / percentage change in pov measures 
pov_wide = poverty_rate[poverty_rate.scenario!="baseline"].pivot_table("poverty_rate","scenario","ca")
pov_wide["baseline"] = poverty_rate[poverty_rate.scenario == "baseline"].poverty_rate.values[0]
pov_wide = pov_wide.reset_index()
pov_wide.columns = ["scenario", "child care", "child allowance", "baseline"]
pov_wide["pdif_base_cc"] = percent_change(pov_wide["child care"],pov_wide["baseline"])
pov_wide["pdif_base_ca"] = percent_change(pov_wide["child allowance"],pov_wide["baseline"])
pov_wide["pdif_ca_cc"] = percent_change(pov_wide["child allowance"],pov_wide["child care"])

# Prepare dataframe for plot output
no_ca_rows = {"scenario":pov_wide["scenario"], "Transfer type":"Child care", "Percentage difference in poverty from baseline":pov_wide["pdif_base_cc"]}
no_ca_rows = pd.DataFrame(no_ca_rows)
row_1 = no_ca_rows[no_ca_rows.scenario == "cc_replacement"]
row_2 = no_ca_rows[no_ca_rows.scenario == "low_cc_full"]
row_3 = no_ca_rows[no_ca_rows.scenario == "high_cc_full"]

ca_rows = {"scenario":pov_wide["scenario"], "Transfer type":"Child allowance", "Percentage difference in poverty from baseline":pov_wide["pdif_base_ca"]}
ca_rows = pd.DataFrame(ca_rows)
row_4 = ca_rows[ca_rows.scenario == "cc_replacement"]
row_5 = ca_rows[ca_rows.scenario == "low_cc_full"]
row_6 = ca_rows[ca_rows.scenario == "high_cc_full"]

# Append the rows
row_2 = row_1.append(row_2)
row_3 = row_2.append(row_3)
row_4 = row_3.append(row_4)
row_5 = row_4.append(row_5)
row_6 = row_5.append(row_6)
pov_long = row_6

# Rename scenarios
pov_long.loc[pov_long.scenario == "cc_replacement", "scenario"] = "Expense replacement"
pov_long.loc[pov_long.scenario == "high_cc_full", "scenario"] = "High quality"
pov_long.loc[pov_long.scenario == "low_cc_full", "scenario"] = "Base quality" 

pov_plot = px.bar(pov_long, x="scenario", y="Percentage difference in poverty from baseline", color="Transfer type", barmode="group",color_discrete_map={"Child care":"steelblue","Child allowance":"grey"})
format_fig(pov_plot)


In [15]:
### Deep poverty
# Use pivot table to construct difference / percentage change in pov measures 
deep_pov_wide = deep_poverty_rate[deep_poverty_rate.scenario!="baseline"].pivot_table("deep_poverty_rate","scenario","ca")
deep_pov_wide["baseline"] = deep_poverty_rate[deep_poverty_rate.scenario == "baseline"].deep_poverty_rate.values[0]
deep_pov_wide = deep_pov_wide.reset_index()
deep_pov_wide.columns = ["scenario", "child care", "child allowance", "baseline"]
deep_pov_wide["pdif_base_cc"] = percent_change(deep_pov_wide["child care"],deep_pov_wide["baseline"])
deep_pov_wide["pdif_base_ca"] = percent_change(deep_pov_wide["child allowance"],deep_pov_wide["baseline"])
deep_pov_wide["pdif_ca_cc"] = percent_change(deep_pov_wide["child allowance"],deep_pov_wide["child care"])

# Prepare dataframe for plot output
no_ca_rows = {"scenario":deep_pov_wide["scenario"], "Transfer type":"Child care", "Percentage difference in poverty from baseline":deep_pov_wide["pdif_base_cc"]}
no_ca_rows = pd.DataFrame(no_ca_rows)
row_1 = no_ca_rows[no_ca_rows.scenario == "cc_replacement"]
row_2 = no_ca_rows[no_ca_rows.scenario == "low_cc_full"]
row_3 = no_ca_rows[no_ca_rows.scenario == "high_cc_full"]

ca_rows = {"scenario":deep_pov_wide["scenario"], "Transfer type":"Child allowance", "Percentage difference in poverty from baseline":deep_pov_wide["pdif_base_ca"]}
ca_rows = pd.DataFrame(ca_rows)
row_4 = ca_rows[ca_rows.scenario == "cc_replacement"]
row_5 = ca_rows[ca_rows.scenario == "low_cc_full"]
row_6 = ca_rows[ca_rows.scenario == "high_cc_full"]

# Append the rows
row_2 = row_1.append(row_2)
row_3 = row_2.append(row_3)
row_4 = row_3.append(row_4)
row_5 = row_4.append(row_5)
row_6 = row_5.append(row_6)
pov_long = row_6

# Rename scenarios
pov_long.loc[pov_long.scenario == "cc_replacement", "scenario"] = "Expense replacement"
pov_long.loc[pov_long.scenario == "high_cc_full", "scenario"] = "High quality"
pov_long.loc[pov_long.scenario == "low_cc_full", "scenario"] = "Base quality" 

pov_plot = px.bar(pov_long, x="scenario", y="Percentage difference in poverty from baseline", color="Transfer type", barmode="group",color_discrete_map={"Child care":"steelblue","Child allowance":"grey"})
format_fig(pov_plot)


In [62]:
poverty_rate_child

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
scenario,ca,child_6,Unnamed: 3_level_1
baseline,False,False,0.1261
baseline,False,True,0.149787
cc_replacement,False,False,0.124124
cc_replacement,False,True,0.139226
cc_replacement,True,False,0.120445
cc_replacement,True,True,0.115222
high_cc_full,False,False,0.106706
high_cc_full,False,True,0.013208
high_cc_full,True,False,0.106664
high_cc_full,True,True,0.012909


In [61]:
# Poverty plot age
# Use pivot table to construct difference / percentage change in pov measures 
child_pov_wide = poverty_rate_child[poverty_rate_child.scenario!="baseline"].pivot_table("poverty_rate","scenario","ca", "child_6")
child_pov_wide["baseline"] = poverty_rate_child[poverty_rate_child.scenario == "baseline"].poverty_rate.values[0]
child_pov_wide = child_pov_wide.reset_index()
child_pov_wide.columns = ["scenario", "child care", "child allowance", "baseline"]
child_pov_wide["pdif_base_cc"] = percent_change(child_pov_wide["child care"],child_pov_wide["baseline"])
child_pov_wide["pdif_base_ca"] = percent_change(child_pov_wide["child allowance"],child_pov_wide["baseline"])
child_pov_wide["pdif_ca_cc"] = percent_change(child_pov_wide["child allowance"],child_pov_wide["child care"])

# Prepare dataframe for plot output
no_ca_rows = {"scenario":child_pov_wide["scenario"], "Transfer type":"Child care", "Percentage difference in poverty from baseline":child_pov_wide["pdif_base_cc"]}
no_ca_rows = pd.DataFrame(no_ca_rows)
row_1 = no_ca_rows[no_ca_rows.scenario == "cc_replacement"]
row_2 = no_ca_rows[no_ca_rows.scenario == "low_cc_full"]
row_3 = no_ca_rows[no_ca_rows.scenario == "high_cc_full"]

ca_rows = {"scenario":child_pov_wide["scenario"], "Transfer type":"Child allowance", "Percentage difference in poverty from baseline":child_pov_wide["pdif_base_ca"]}
ca_rows = pd.DataFrame(ca_rows)
row_4 = ca_rows[ca_rows.scenario == "cc_replacement"]
row_5 = ca_rows[ca_rows.scenario == "low_cc_full"]
row_6 = ca_rows[ca_rows.scenario == "high_cc_full"]

# Append the rows
row_2 = row_1.append(row_2)
row_3 = row_2.append(row_3)
row_4 = row_3.append(row_4)
row_5 = row_4.append(row_5)
row_6 = row_5.append(row_6)
pov_long = row_6

# Rename scenarios
pov_long.loc[pov_long.scenario == "cc_replacement", "scenario"] = "Expense replacement"
pov_long.loc[pov_long.scenario == "high_cc_full", "scenario"] = "High quality"
pov_long.loc[pov_long.scenario == "low_cc_full", "scenario"] = "Base quality" 

pov_plot = px.bar(pov_long, x="scenario", y="Percentage difference in poverty from baseline", color="Transfer type", barmode="group",color_discrete_map={"Child care":"steelblue","Child allowance":"grey"})
format_fig(pov_plot)


AttributeError: 'DataFrame' object has no attribute 'scenario'

In [45]:
### Gini
# Use pivot table to construct difference / percentage change in pov measures 
gini_wide = gini[gini.scenario!="baseline"].pivot_table("gini_coefficient","scenario","ca")
gini_wide["baseline"] = gini[gini.scenario == "baseline"].gini_coefficient.values[0]
gini_wide = gini_wide.reset_index()
gini_wide.columns = ["scenario", "child care", "child allowance", "baseline"]
gini_wide["pdif_base_cc"] = percent_change(gini_wide["child care"],gini_wide["baseline"])
gini_wide["pdif_base_ca"] = percent_change(gini_wide["child allowance"],gini_wide["baseline"])
gini_wide["pdif_ca_cc"] = percent_change(gini_wide["child allowance"],gini_wide["child care"])

# Prepare dataframe for plot output
no_ca_rows = {"scenario":gini_wide["scenario"], "Transfer type":"Child care", "Percentage difference in Gini coefficient from baseline":gini_wide["pdif_base_cc"]}
no_ca_rows = pd.DataFrame(no_ca_rows)
row_1 = no_ca_rows[no_ca_rows.scenario == "cc_replacement"]
row_2 = no_ca_rows[no_ca_rows.scenario == "low_cc_full"]
row_3 = no_ca_rows[no_ca_rows.scenario == "high_cc_full"]

ca_rows = {"scenario":gini_wide["scenario"], "Transfer type":"Child allowance", "Percentage difference in Gini coefficient from baseline":gini_wide["pdif_base_ca"]}
ca_rows = pd.DataFrame(ca_rows)
row_4 = ca_rows[ca_rows.scenario == "cc_replacement"]
row_5 = ca_rows[ca_rows.scenario == "low_cc_full"]
row_6 = ca_rows[ca_rows.scenario == "high_cc_full"]

# Append the rows
row_2 = row_1.append(row_2)
row_3 = row_2.append(row_3)
row_4 = row_3.append(row_4)
row_5 = row_4.append(row_5)
row_6 = row_5.append(row_6)
pov_long = row_6

# Rename scenarios
pov_long.loc[pov_long.scenario == "cc_replacement", "scenario"] = "Expense replacement"
pov_long.loc[pov_long.scenario == "high_cc_full", "scenario"] = "High quality"
pov_long.loc[pov_long.scenario == "low_cc_full", "scenario"] = "Base quality" 

pov_plot = px.bar(pov_long, x="scenario", y="Percentage difference in Gini coefficient from baseline", color="Transfer type", barmode="group",color_discrete_map={"Child care":"steelblue","Child allowance":"grey"})
format_fig(pov_plot)


We then output state-based poverty rate and gini percentage changes to reflect the impact of the simulation. 

scenario,baseline,cc_replacement,cc_replacement,high_cc_full,high_cc_full,low_cc_full,low_cc_full
ca,False,False,True,False,True,False,True
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Alabama,0.127468,0.126259,0.120791,0.102628,0.100755,0.107343,0.105218
Alaska,0.132965,0.131078,0.125391,0.094711,0.096226,0.103367,0.106835
Arizona,0.119701,0.11608,0.10674,0.090913,0.090579,0.09693,0.094243
Arkansas,0.130987,0.128325,0.120045,0.103,0.102164,0.10848,0.106633
California,0.174174,0.172219,0.166576,0.136286,0.137809,0.144813,0.147043
Colorado,0.116944,0.114349,0.113578,0.093067,0.093067,0.09847,0.09847
Connecticut,0.113332,0.111758,0.110628,0.087151,0.087728,0.090861,0.093475
Delaware,0.105911,0.10277,0.097914,0.086105,0.086105,0.089422,0.089422
District of Columbia,0.169771,0.167536,0.16092,0.142059,0.143431,0.144321,0.146682
Florida,0.159601,0.156689,0.150109,0.129471,0.128967,0.137851,0.134594


In [18]:
# State analysis
# Create pivot table to interpret state-based poverty effects
state_pov = poverty_rate_state.pivot_table(
    values="poverty_rate", index="state", columns=["scenario","ca"]
)
state_pov = state_pov.reset_index()
deep_state_pov = deep_poverty_rate_state.pivot_table(
    values="poverty_rate", index="state", columns=["scenario","ca"]
)
# Create pivot table to interpret state-based gini effects
state_gini = gini_state.pivot_table(
    values="gini_coefficient", index="state", columns=["scenario", "ca"]
)

us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
state_pov["state_code"]= state_pov.state.map(us_state_abbrev)
px.choropleth(locations=state_pov.state_code,locationmode="USA-states",color=state_pov.baseline,scope="usa")

KeyError: 'poverty_rate'

In [None]:
state_pov

In [26]:

"""
# Generate state-based poverty rate percentage changes
state_pov["poverty_change_cc"] =  state_pov.cc_replacement - state_pov.baseline
state_pov["poverty_change_flat"] = state_pov.child_allowance - state_pov.baseline
state_pov["poverty_change_pc_cc"] = state_pov.poverty_change_cc - state_pov.baseline
state_pov["poverty_change_pc_flat"] = (
    state_pov.poverty_change_flat - state_pov.baseline
)

# Construct state-based gini coefficient percentage changes
state_gini["gini_change_cc"] = state_gini.cc_replacement - state_gini.baseline
state_gini["gini_change_flat"] = state_gini.child_allowance - state_gini.baseline
state_gini["gini_change_pc_cc"] = percent_change(
    state_gini.gini_change_cc, state_gini.baseline
)
state_gini["gini_change_pc_flat"] = percent_change(
    state_gini.gini_change_flat, state_gini.baseline
)

# Re-arrange and present pivot tables, descending by % change
# in poverty rate
state_pov.sort_values(by="poverty_change_pc_flat", ascending=True)
state_gini.sort_values(by="gini_change_pc_flat", ascending=True)
"""

'\n# Generate state-based poverty rate percentage changes\nstate_pov["poverty_change_cc"] =  state_pov.cc_replacement - state_pov.baseline\nstate_pov["poverty_change_flat"] = state_pov.child_allowance - state_pov.baseline\nstate_pov["poverty_change_pc_cc"] = state_pov.poverty_change_cc - state_pov.baseline\nstate_pov["poverty_change_pc_flat"] = (\n    state_pov.poverty_change_flat - state_pov.baseline\n)\n\n# Construct state-based gini coefficient percentage changes\nstate_gini["gini_change_cc"] = state_gini.cc_replacement - state_gini.baseline\nstate_gini["gini_change_flat"] = state_gini.child_allowance - state_gini.baseline\nstate_gini["gini_change_pc_cc"] = percent_change(\n    state_gini.gini_change_cc, state_gini.baseline\n)\nstate_gini["gini_change_pc_flat"] = percent_change(\n    state_gini.gini_change_flat, state_gini.baseline\n)\n\n# Re-arrange and present pivot tables, descending by % change\n# in poverty rate\nstate_pov.sort_values(by="poverty_change_pc_flat", ascending=True

## Visualizations


In [None]:
# Poverty rate by sim
pov_plot = px.bar(poverty_rate,x="sim",y="poverty_rate")
format_fig(pov_plot)

In [None]:
# Deep poverty by sim
deep_pov_plot = px.bar(deep_poverty_rate,x="sim",y="poverty_rate")
format_fig(deep_pov_plot)

In [13]:
# Child poverty by sim
child_pov_plot = px.bar(poverty_rate_child,x="sim",y="poverty_rate")
format_fig(child_pov_plot)

In [29]:
# Deep child poverty by sim 
deep_pov_child_plot = px.bar(deep_poverty_rate_child,x="sim",y="poverty_rate")
format_fig(deep_pov_child_plot)

Unnamed: 0_level_0,poverty_rate
sim,Unnamed: 1_level_1
baseline,0.12783
high_ca,0.0
high_xpns,0.0
low_ca,0.0
low_xpns,0.0
replace_ca,0.120064
replace_xpns,0.125228
