In [1]:
import pandas as pd
import numpy as np
import microdf as mdf
import plotly.express as px
import statsmodels.api as sm
from ubicenter import format_fig

In [26]:
# Read data per person per year.
py = pd.read_csv("data/nys_cps.csv.gz")
py.columns = py.columns.str.lower()
py.inctot.replace({999999999: 0}, inplace=True)
py.adjginc.replace({99999999: 0}, inplace=True)
# 2014 was double-sampled.
py.loc[py.year == 2014, ["asecwt", "spmwt"]] /= 2

In [27]:
NYC_COUNTY = pd.DataFrame(columns=["fips", "county", "pop_m"])
NYS_FIPS = 36
# Per https://en.wikipedia.org/wiki/List_of_counties_in_New_York
NYC_COUNTY.loc["Manhattan"] = pd.Series({"fips": 61, "county": "New York", "pop_m": 1.632})
NYC_COUNTY.loc["Brooklyn"] = pd.Series({"fips": 47, "county": "Kings", "pop_m": 2.59})
NYC_COUNTY.loc["The Bronx"] = pd.Series({"fips": 5, "county": "Bronx", "pop_m": 1.435})
NYC_COUNTY.loc["Staten Island"] = pd.Series({"fips": 85, "county": "Richmond", "pop_m": 0.475})
NYC_COUNTY.loc["Queens"] = pd.Series({"fips": 81, "county": "Queens", "pop_m": 2.287})
NYC_COUNTY["full_fips"] = NYS_FIPS * 1000 + NYC_COUNTY.fips

py = py[py.county.isin(NYC_COUNTY.full_fips)][
    ["county", "asecwt", 'age', 'sex', 'race', 'hispan', 'inctot', 'spmwt', 'spmtotres', 'spmthresh',
       'spmfamunit', 'adjginc', 'ftotval', 'spmftotval', 'year', 'spmgeoadj']]
py["adult"] = py.age > 17
py["child"] = py.age <= 17
py["spmratio"] = py.spmtotres / py.spmthresh
py["poor"] = py.spmratio < 1
py["deep_poor"] = py.spmratio < 0.5
# py[]
NYC_COUNTY["asec_pop_m"] = NYC_COUNTY.full_fips.apply(
    lambda x: py[(py.county == x) & (py.year == py.year.max())].asecwt.sum() / 1e6
)
NYC_COUNTY

Unnamed: 0,fips,county,pop_m,full_fips,asec_pop_m
Manhattan,61,New York,1.632,36061,1.634743
Brooklyn,47,Kings,2.59,36047,2.634859
The Bronx,5,Bronx,1.435,36005,1.58628
Staten Island,85,Richmond,0.475,36085,0.497162
Queens,81,Queens,2.287,36081,1.869408


In [28]:
mdf.weighted_mean(py, "spmgeoadj", "asecwt", "year")

year
2010    1.171758
2011    1.171034
2012    1.177843
2013    1.185227
2014    1.197308
2015    1.205353
2016    1.212111
2017    1.216385
2018    1.214323
2019    1.209977
2020    1.207567
dtype: float64

In [29]:
mdf.weighted_mean(py, "deep_poor", "asecwt", "year")

year
2010    0.077847
2011    0.083146
2012    0.091158
2013    0.085348
2014    0.080284
2015    0.061335
2016    0.055558
2017    0.048316
2018    0.047428
2019    0.053130
2020    0.048592
dtype: float64

In [31]:
py.groupby("year").asecwt.sum()

year
2010    8018515.38
2011    8142355.16
2012    8304166.90
2013    8266401.85
2014    8317762.07
2015    8481922.52
2016    8570608.64
2017    8479062.17
2018    8601995.46
2019    8375445.35
2020    8222452.07
Name: asecwt, dtype: float64

In [21]:
py.groupby("year").size()

year
2010    4011
2011    3892
2012    3888
2013    3741
2014    3761
2015    3714
2016    3327
2017    3288
2018    3017
2019    3274
2020    2712
dtype: int64

In [9]:
py.groupby("year").

6428

In [29]:
# Check number of adults in deep poverty (Yang claims 500,000).
RATIO_THRESH = 0.62
p[p.adult & (p.spmratio < RATIO_THRESH)].asecwt.sum()

503897.7699999999

In [64]:
BUDGET = 1e9

SPMU_COLS = ["spmfamunit", "spmtotres", "spmthresh", "spmratio", "spmwt", "spmftotval"]
SPMU_AGGS = ["adult", "child"]

s = p.groupby(SPMU_COLS)[SPMU_AGGS].sum().reset_index()

In [65]:
# This should also approximate 500,000.
mdf.weighted_sum(s[s.spmratio < RATIO_THRESH], "adult", "spmwt")

323290.41

In [105]:
def phase_out(amount, rate, respect_to):
    return np.maximum(0, amount - respect_to * rate)

# Phase-out rate.
ps = 0.3

RATIO_THRESH = 0.25

s["max_transfer"] = np.maximum(0, s.spmthresh * RATIO_THRESH)
s["transfer"] = phase_out(s.max_transfer, ps, np.maximum(s.spmtotres, 0))
mdf.weighted_sum(s, "transfer", "spmwt") / 1e9
mdf.weighted_sum(s[s.transfer > 0], "adult", "spmwt")
# s.sort_values("transfer")

784332.8600000001

In [81]:
# Determine individualized thresholds.
adult_thresh = s[(s.adult == 1) & (s.child == 0)].spmthresh.max()
child_thresh = ((s.spmthresh - s.adult * adult_thresh) / s.child).max()
print(adult_thresh, child_thresh)
s["spmthresh_ind"] = s.adult * adult_thresh + s.child * child_thresh
s[s.spmthresh == s.spmthresh_ind]

16470.0 8380.0


Unnamed: 0,spmfamunit,spmtotres,spmthresh,spmratio,spmwt,spmftotval,adult,child,max_transfer,transfer,spmthresh_ind
41,7689001,83471.0,24850.0,3.358994,3104.71,111800,1,1,12425.0,0.0,24850.0
61,7711001,50790.0,16470.0,3.083789,4015.77,73120,1,0,8235.0,0.0,16470.0
238,8035001,24600.0,16470.0,1.493625,2786.97,27653,1,0,8235.0,0.0,16470.0
305,8109001,70425.0,16470.0,4.275956,5872.86,101965,1,0,8235.0,0.0,16470.0
322,8131001,75005.0,16470.0,4.554038,3219.07,110011,1,0,8235.0,0.0,16470.0
328,8142001,59300.0,16470.0,3.600486,4410.93,95210,1,0,8235.0,0.0,16470.0
385,8199001,16700.0,16470.0,1.013965,4949.22,19476,1,0,8235.0,0.0,16470.0
418,8233001,143123.0,16470.0,8.689921,3257.74,229201,1,0,8235.0,0.0,16470.0
449,8301005,21686.0,16470.0,1.316697,2152.04,31002,1,0,8235.0,0.0,16470.0
497,8404001,80899.0,16470.0,4.9119,2254.65,103627,1,0,8235.0,0.0,16470.0


In [87]:
SPMTHRESH_PREDICTORS = ["child", "adult"]
spmthresh_reg = sm.WLS(s.spmthresh, s[SPMTHRESH_PREDICTORS], s.spmwt).fit()
print(spmthresh_reg.params)
s["spmthresh_pred"] = spmthresh_reg.predict(s[SPMTHRESH_PREDICTORS])
s[SPMTHRESH_PREDICTORS + ["spmthresh", "spmthresh_ind", "spmthresh_pred"]]

child     5221.311942
adult    11467.075786
dtype: float64


Unnamed: 0,child,adult,spmthresh,spmthresh_ind,spmthresh_pred
0,0,3,35030.0,49410.0,34401.227359
1,0,2,22890.0,32940.0,22934.151573
2,0,3,35030.0,49410.0,34401.227359
3,0,1,16240.0,16470.0,11467.075786
4,1,1,24500.0,24850.0,16688.387729
...,...,...,...,...,...
1184,4,2,43460.0,66460.0,43819.399342
1185,2,2,35530.0,49700.0,33376.775457
1186,1,3,32780.0,57790.0,39622.539301
1187,2,2,35530.0,49700.0,33376.775457


In [6]:
# Add cumulative adult population to identify bottom 500,000 adults.
pna = pn[pn.age >= 18].copy()
pna.sort_values("spmftotval", inplace=True)
pna["pop_cum"] = pna.asecwt.cumsum()

spmtotres_thresh = pna[pna.pop_cum < 500000].spmtotres.max()

pna[pna.pop_cum < 500000]

Unnamed: 0,county,asecwt,age,sex,race,hispan,inctot,spmwt,spmtotres,spmthresh,spmfamunit,adjginc,ftotval,spmftotval,adult,pop_cum
2587,36047,2344.91,76,2,200,0,0,2344.91,0.0,33380.0,8485001,0,0,0,True,2344.91
3459,36061,1986.83,37,1,100,0,0,1986.83,17876.0,30830.0,9057001,0,0,0,True,4331.74
3458,36061,1986.83,37,2,100,0,0,1986.83,17876.0,30830.0,9057001,0,0,0,True,6318.57
1149,36005,5823.38,67,1,200,400,0,5823.38,0.0,16240.0,7668003,0,0,0,True,12141.95
3365,36061,2272.33,62,2,200,0,0,2272.33,-300.0,22890.0,8940001,0,0,0,True,14414.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4122,36081,4959.12,80,1,100,0,12000,4959.12,11858.0,13640.0,9399001,0,12000,12000,True,483357.89
1844,36047,3225.03,55,1,651,0,12000,3225.03,22748.0,39020.0,8059001,12000,12000,12000,True,486582.92
3075,36061,4949.22,72,1,100,0,12000,4949.22,18504.0,16240.0,8726001,0,12000,12000,True,491532.14
1152,36005,3090.58,32,1,200,0,12000,3090.58,30979.0,29080.0,7673001,11152,12000,12000,True,494622.72


In [7]:
sn = pn[["spmfamunit", "spmwt", "spmftotval", "spmtotres", "spmthresh"]].drop_duplicates().sort_values("spmftotval")
sn["spmwt_cum"] = sn.spmwt.cumsum()
spmftotval_thresh = sn[sn.spmwt_cum < 500000].spmftotval.max()
sn[sn.spmftotval <= spmftotval_thresh].spmwt.sum()
sn["ubi"] = np.where(sn.spmftotval <= spmftotval_thresh, 2000, 0)
sn["spmtotres_ubi"] = sn.spmtotres + sn.ubi

In [8]:
mdf.weighted_sum(sn, "ubi", "spmwt")

1004590420.0

In [9]:
pn = pn.merge(sn[["spmfamunit", "spmtotres_ubi"]], on="spmfamunit")


In [10]:
mdf.poverty_gap(sn, "spmtotres_ubi", "spmthresh", "spmwt") / 1e9

5.53524380132

In [11]:
mdf.poverty_gap(sn, "spmtotres", "spmthresh", "spmwt") / 1e9

6.256897865129999

In [12]:
mdf.deep_poverty_rate(pn, "spmtotres", "spmthresh", "asecwt")

0.04859213974110767

In [13]:
mdf.deep_poverty_rate(pn, "spmtotres_ubi", "spmthresh", "asecwt")

0.04356069904096139

In [14]:
mdf.poverty_rate(pn, "spmtotres_ubi", "spmthresh", "asecwt")

0.18354143352276175

In [15]:
mdf.poverty_rate(pn, "spmtotres", "spmthresh", "asecwt")

0.19196885996563795

In [16]:
def pct_chg(base, reform):
    return (reform - base) / base

pct_chg(mdf.poverty_rate(pn, "spmtotres", "spmthresh", "asecwt"),
mdf.poverty_rate(pn, "spmtotres_ubi", "spmthresh", "asecwt"))

-0.04389996609025385

In [17]:
pct_chg(mdf.deep_poverty_rate(pn, "spmtotres", "spmthresh", "asecwt"),
mdf.deep_poverty_rate(pn, "spmtotres_ubi", "spmthresh", "asecwt"))

-0.10354433303314309

In [18]:
pct_chg(mdf.gini(sn, "spmtotres", "spmwt"),
mdf.gini(sn, "spmtotres_ubi", "spmwt"))

-0.01087395327199434

In [19]:
# NYS population: 19.45M official as of 2019.
p[p.statefip == NYS_FIPS].asecwt.sum() / 1e6

19.103431190000002

In [20]:
p[(p.statefip == NYS_FIPS) & p.county.isin(NYC_FIPS)].asecwt.sum() / 1e6

NameError: name 'NYC_FIPS' is not defined

In [22]:
p[p.county.isin([NYS_FIPS * 1000 + i for i in NYC_FIPS])].groupby("county").asecwt.sum()

county
36005    1586280.10
36047    2634859.16
36061    1634742.52
36081    1869407.84
36085     497162.45
Name: asecwt, dtype: float64

In [20]:
p[p.county == 36061].asecwt.sum()

1634742.52