In [24]:
import pandas as pd
import numpy as np
import microdf as mdf

p = pd.read_csv("../data/asec_2019_ipums.csv.gz")

# Preprocess
p.columns = p.columns.str.lower()

# Replace NIU codes.
NIU_CODES = {
    "adjginc": 99999999,
    "offtotval": 9999999999,
    # Secondary individuals under 15 have NIU for cutoff.
    # We set them to zero as to give them no UBI
    # (they're still part of SPM units with others who get UBI).
    "cutoff": 999999,
    "incwelfr": 999999,
    "incunemp": 999999,
}
for column, niu in NIU_CODES.items():
    p[column].replace({niu: 0}, inplace=True)

p["fam"] = p.marbasecidh.astype(str) + "-" + p.famid.astype(str)

# Add age flags.
p["child"] = p.age < 18
p["adult"] = ~p.child

# Create aggregates.

# Family: note families are assigned only to one SPM unit.
f = p.groupby(["fam", "cutoff", "offtotval", "spmfamunit"]).adjginc.sum().reset_index().rename(columns={"adjginc": "fam_adjginc"})

# SPM unit, with all SPM characteristics.

S_AGGS = ["incwelfr", "incunemp", "adult", "child"]
s = p.groupby(['spmlunch', 'spmcaphous', 'spmwt', 'spmeitc',
       'spmwic', 'spmheat', 'spmsnap', 'spmtotres', 'spmthresh', 'spmfamunit'])[
           S_AGGS].sum().reset_index().rename(columns=dict(zip(S_AGGS, ["spm" + i for i in S_AGGS])))
s

Unnamed: 0,spmlunch,spmcaphous,spmwt,spmeitc,spmwic,spmheat,spmsnap,spmtotres,spmthresh,spmfamunit,spmincwelfr,spmincunemp,spmadult,spmchild
0,0,0.0,94.35,0,0.0,0.0,0,164712.0,10830.0,70986003,0,0,1,0
1,0,0.0,120.17,0,0.0,0.0,0,33575.0,15260.0,71089001,0,0,2,0
2,0,0.0,124.35,0,0.0,0.0,0,-60.0,20530.0,34805001,0,0,2,0
3,0,0.0,125.80,0,0.0,0.0,0,12870.0,20530.0,35533001,0,0,2,0
4,0,0.0,125.81,0,0.0,0.0,0,18348.0,15260.0,70945001,0,0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63087,4500,0.0,1487.31,4760,0.0,0.0,0,50711.0,41120.0,61841001,0,0,2,7
63088,4500,0.0,2102.49,0,0.0,0.0,5040,26441.0,41750.0,45229001,0,0,3,7
63089,4500,0.0,2868.53,3375,0.0,0.0,0,23489.0,60160.0,48466001,0,0,4,8
63090,5142,26070.0,1996.90,6034,490.0,300.0,10500,76546.0,56900.0,8384001,0,0,2,8


In [28]:
def phase_out(amount, rate, respect_to):
    return np.maximum(0, amount - respect_to * rate)


def ubi_fpg(p, f, s,
    phase_out_rate=0, respect_to="offtotval", repeal_bens=None, cutoff_ratio=1,
    adult_amount=0, child_amount=0
):
    # Branch the logic if provided at individual level.
    if adult_amount > 0:
        s = s.copy()
        s["chg"] = phase_out(s.spmadult * adult_amount + s.spmchild * child_amount, phase_out_rate, np.maximum(0, s[respect_to]))
    else: 
        f["chg"] = phase_out(cutoff_ratio * f.cutoff, phase_out_rate, np.maximum(0, f[respect_to]))
        s = s.merge(f.groupby("spmfamunit").chg.sum().reset_index(), on="spmfamunit")
    if repeal_bens is not None and repeal_bens != [""]:
        s.chg -= s[repeal_bens].sum(axis=1)
    p = p.merge(s[["chg", "spmfamunit"]], on="spmfamunit")
    p.spmtotres += p.chg
    p["loser"] = p.chg < -1
    pov = mdf.poverty_rate(p, "spmtotres", "spmthresh", "asecwt")
    cost = mdf.weighted_sum(s, "chg", "spmwt")
    pct_loser = mdf.weighted_mean(p, "loser", "asecwt")
    return pd.Series({"poverty": pov, "cost": cost, "pct_loser": pct_loser})

In [29]:
REPEAL_BENS = "spmeitc,spmcaphous,spmsnap,spmincwelfr,spmincunemp,spmlunch"
sim_fam = mdf.cartesian_product({
    "phase_out": [np.inf, 0.5, 0.25, 0],
    "repeal_bens": ["", REPEAL_BENS],
    "cutoff_ratio": [1, 1.3],
    })
sim_fam = pd.concat([
    sim_fam,
    sim_fam.apply(lambda x: ubi_fpg(p, f, s, phase_out_rate=x.phase_out,
    cutoff_ratio=x.cutoff_ratio, repeal_bens=x.repeal_bens.split(",")), axis=1)
    ], axis=1)

# Individual level.
sim_ind = mdf.cartesian_product({
    "phase_out": [np.inf, 0.5, 0.25, 0],
    "adult_amount": [13300],
    "child_amount": [4516],
    "repeal_bens": ["", REPEAL_BENS]
    })
sim_ind = pd.concat([
    sim_ind,
    sim_ind.apply(lambda x: ubi_fpg(p, f, s, phase_out_rate=x.phase_out,
    adult_amount=x.adult_amount, child_amount=x.child_amount, repeal_bens=x.repeal_bens.split(",")), axis=1)
    ], axis=1)

sim = pd.concat([sim_fam, sim_ind])

cur_pov = sim[(sim.phase_out == np.inf) & (sim.repeal_bens == "")].poverty.values[0]

def pct_chg(base, new):
    return (new - base) / base

sim["pov_chg"] = pct_chg(cur_pov, sim.poverty)
sim

KeyError: 'offtotval'