In [1]:
import pandas as pd
import numpy as np
import microdf as mdf
import plotly.express as px
import ubicenter

p = pd.read_csv("../data/asec_2019_ipums.csv.gz")

# Preprocess
p.columns = p.columns.str.lower()

# Replace NIU codes.
NIU_CODES = {
    "adjginc": 99999999,
    "offtotval": 9999999999,
    # Secondary individuals under 15 have NIU for cutoff.
    # We set them to zero as to give them no UBI
    # (they're still part of SPM units with others who get UBI).
    "cutoff": 999999,
    "incwelfr": 999999,
    "incunemp": 999999,
}
for column, niu in NIU_CODES.items():
    p[column].replace({niu: 0}, inplace=True)

p["fam"] = p.marbasecidh.astype(str) + "-" + p.famid.astype(str)

# Add age flags.
p["child"] = p.age < 18
p["adult"] = ~p.child

p["adjginc_noui"] = p.adjginc - p.incunemp

p["adjginc_pos"] = np.maximum(p.adjginc, 0)
p["adjginc_noui_pos"] = np.maximum(p.adjginc_noui, 0)

# Create aggregates.

def agg(p, groupby, sums, zeros, prefix, id):
    res = p.groupby(groupby)[sums].sum().reset_index().rename(
        columns=dict(zip(sums, [prefix + i for i in sums]))
    )
    for i in zeros:
        res[i + "_pos"] = np.maximum(res[i], 0)
    # Check no duplicates.
    assert res.groupby(id).size().max() == 1
    return res

# Family: note families are assigned only to one SPM unit.
F_KEYS = ["fam", "cutoff", "offtotval", "asecfwt", "spmfamunit"]
F_AGGS = ["adjginc", "adjginc_noui"]
f = agg(p, F_KEYS, F_AGGS, ["offtotval", "famadjginc", "famadjginc_noui"], "fam", "fam")

# SPM unit, with all SPM characteristics.
S_KEYS = ['spmlunch', 'spmcaphous', 'spmwt', 'spmeitc',
          'spmwic', 'spmheat', 'spmsnap', 'spmtotres', 'spmthresh', 'spmfamunit']
S_AGGS = ["incwelfr", "incunemp", "adult", "child", "adjginc", "adjginc_noui"]
s = agg(p, S_KEYS, S_AGGS, ["spmadjginc", "spmadjginc_noui"], "spm", "spmfamunit")

In [2]:
# Family-based official poverty gap according to different measures.
pd.Series({
    "OPM": mdf.poverty_gap(f, "offtotval_pos", "cutoff", "asecfwt"),
    "AGI": mdf.poverty_gap(f, "famadjginc_pos", "cutoff", "asecfwt")
}) / 1e9

OPM    159.481811
AGI    476.854555
dtype: float64

In [3]:
def phase_out(amount, rate, respect_to):
    return np.maximum(0, amount - respect_to * rate)


def ubi_fpg(p, f, s,
    phase_out_rate=0, repeal_bens=None, cutoff_ratio=1,
    adult_amount=0, child_amount=0
):
    # Assign variable to phase out with respect to.
    respect_to = "adjginc"
    if repeal_bens is not None and repeal_bens != [""]:
        respect_to += "_noui"
    # Branch the logic if provided at individual level.
    if adult_amount > 0:
        s = s.copy()
        s["chg"] = phase_out(s.spmadult * adult_amount + s.spmchild * child_amount, phase_out_rate, np.maximum(0, s["spm" + respect_to]))
    else: 
        f["chg"] = phase_out(cutoff_ratio * f.cutoff, phase_out_rate, np.maximum(0, f["fam" + respect_to]))
        s = s.merge(f.groupby("spmfamunit").chg.sum().reset_index(), on="spmfamunit")
    if repeal_bens is not None and repeal_bens != [""]:
        s.chg -= s[repeal_bens].sum(axis=1)
    p = p.merge(s[["chg", "spmfamunit"]], on="spmfamunit")
    p.spmtotres += p.chg
    p["loser"] = p.chg < -1
    pov = mdf.poverty_rate(p, "spmtotres", "spmthresh", "asecwt")
    cost = mdf.weighted_sum(s, "chg", "spmwt")
    pct_loser = mdf.weighted_mean(p, "loser", "asecwt")
    return pd.Series({"poverty": pov, "cost": cost, "pct_loser": pct_loser})

In [4]:
(phase_out(f.cutoff, 0.5, f.offtotval_pos) * f.asecfwt).sum() / 1e9

346.39312299617006

In [5]:
(phase_out(f.cutoff, 0.5, f.famadjginc_pos) * f.asecfwt).sum() / 1e9

664.604013494945

In [6]:
# Share of families with AGI < OPM resources.
f[f.offtotval > f.famadjginc].asecfwt.sum() / f.asecfwt.sum()

0.4998911882101844

In [7]:
fig = px.scatter(f[f.offtotval.between(0, 50e3) & f.famadjginc.between(0, 50e3)],
                 "offtotval", "famadjginc", opacity=0.1,
                 title="Families' Official Poverty Measure resources vs. adjusted gross income (2019 CPS ASEC)",
                 labels={"offtotval": "Official Poverty Measure resources",
                         "famadjginc": "Adjusted gross income"})
ubicenter.format_fig(fig)

In [8]:
REPEAL_BENS = "spmeitc,spmcaphous,spmsnap,spmincwelfr,spmincunemp,spmlunch"
sim_fam = mdf.cartesian_product({
    "phase_out": [np.inf, 0.5, 0.25, 0],
    "repeal_bens": ["", REPEAL_BENS],
    "cutoff_ratio": [1, 1.3],
    })
sim_fam = pd.concat([
    sim_fam,
    sim_fam.apply(lambda x: ubi_fpg(p, f, s, phase_out_rate=x.phase_out,
    cutoff_ratio=x.cutoff_ratio, repeal_bens=x.repeal_bens.split(",")), axis=1)
    ], axis=1)

# Individual level.
sim_ind = mdf.cartesian_product({
    "phase_out": [np.inf, 0.5, 0.25, 0],
    "adult_amount": [13300],
    "child_amount": [4516],
    "repeal_bens": ["", REPEAL_BENS]
    })
sim_ind = pd.concat([
    sim_ind,
    sim_ind.apply(lambda x: ubi_fpg(p, f, s, phase_out_rate=x.phase_out,
    adult_amount=x.adult_amount, child_amount=x.child_amount, repeal_bens=x.repeal_bens.split(",")), axis=1)
    ], axis=1)

sim = pd.concat([sim_fam, sim_ind])

cur_pov = sim[(sim.phase_out == np.inf) & (sim.repeal_bens == "")].poverty.values[0]

def pct_chg(base, new):
    return (new - base) / base

sim["pov_chg"] = pct_chg(cur_pov, sim.poverty)

In [9]:
sim.to_csv("sim.csv")