In [1]:
import numpy as np
import pandas as pd
import taxcalc as tc
import microdf as mdf

In [2]:
run 'convert_asec_taxcalc.py'

In [3]:
run 'make_tax_units.py'

In [4]:
person = pd.read_csv('~/UBICenter/covid_ui/asec_2019_ipums.csv.gz')

## Preprocessing

In [5]:
# Set columns to lowercase and to 0 or null as appropriate.
prep_ipum(person)
# Add taxid and related fields.
tax_unit_id(person)
# Add other person-level columns in taxcalc form.
person = convert_asec_person_taxcalc(person)
# 99 is the missing code for wksunem1.
# Note: Missing codes for features used in taxcalc are recoded in
# convert_asec_taxcalc.py.
person.loc[person.wksunem1 == 99, 'wksunem1'] = 0

## Add UI to person records

In [6]:
FPUC_MAX_WEEKS = 17  # April to July.
FPUC2_MAX_WEEKS = 17 + 22  # Adding August to December.
FPUC_WEEKLY_BEN = 600
person['fpuc'] = FPUC_WEEKLY_BEN * np.minimum(person.wksunem1, FPUC_MAX_WEEKS)
person['fpuc2'] = FPUC_WEEKLY_BEN * np.minimum(person.wksunem1,
                                               FPUC2_MAX_WEEKS)

In [7]:
# Store original unemployment benefits.
person['e02300_orig'] = person.e02300

## Create tax units and calculate tax liability

In [8]:
# Create tax unit dataframe.
tu = create_tax_unit(person)
tu['RECID'] = tu.taxid
tu['tax'] = mdf.calc_df(records=tc.Records(tu), year=2018).tax.values

Simulate FPUC.

In [9]:
# Create tax unit dataframe.
person.e02300 = person.e02300_orig + person.fpuc
tu_fpuc = create_tax_unit(person)
tu['e02300_fpuc'] = tu_fpuc.e02300
tu_fpuc['RECID'] = tu_fpuc.taxid
tu['tax_fpuc'] = mdf.calc_df(records=tc.Records(tu_fpuc),
                             year=2018).tax.values
del tu_fpuc

Simulate extended FPUC.

In [10]:
# Create tax unit dataframe.
person.e02300 = person.e02300_orig + person.fpuc2
tu_fpuc2 = create_tax_unit(person)
tu['e02300_fpuc2'] = tu_fpuc2.e02300
tu_fpuc2['RECID'] = tu_fpuc2.taxid
tu['tax_fpuc2'] = mdf.calc_df(records=tc.Records(tu_fpuc2),
                             year=2018).tax.values
del tu_fpuc2

In [11]:
# Change person e02300 back.
person.e02300 = person.e02300_orig

## Merge back to the person level

Have each person pay the share of tax differences in proportion with their FPUC.

In [12]:
tu['fpuc_total'] = tu.e02300_fpuc - tu.e02300
tu['fpuc2_total'] = tu.e02300_fpuc2 - tu.e02300
tu['fpuc_tax_total'] = tu.tax_fpuc - tu.tax
tu['fpuc2_tax_total'] = tu.tax_fpuc2 - tu.tax
tu.set_index('RECID', inplace=True)

In [13]:
person2 = person.merge(tu[['fpuc_total', 'fpuc2_total',
                           'fpuc_tax_total', 'fpuc2_tax_total']],
                       left_on='taxid', right_index=True)

In [14]:
for i in ['fpuc', 'fpuc2']:
    person2[i + '_tax'] = np.where(person2[i + '_total'] == 0, 0,
        person2[i + '_tax_total'] * person2[i] / person2[i + '_total'])
    person2[i + '_net'] = person2[i] - person2[i + '_tax']

Checks that the totals match by person and tax unit.

In [15]:
assert np.allclose(tu.fpuc_total.sum(), person2.fpuc.sum())
assert np.allclose(tu.fpuc2_total.sum(), person2.fpuc2.sum())
assert np.allclose(tu.fpuc_tax_total.sum(), person2.fpuc_tax.sum())
assert np.allclose(tu.fpuc2_tax_total.sum(), person2.fpuc2_tax.sum())

## Calculate budget-neutral UBIs and payroll taxes

In [16]:
fpuc_budget = mdf.weighted_sum(person2, 'fpuc_net', 'asecwt')
fpuc2_budget = mdf.weighted_sum(person2, 'fpuc2_net', 'asecwt')
pop = person2.asecwt.sum()
adult_pop = person2[person2.age > 17].asecwt.sum()
total_fica = mdf.weighted_sum(person2, 'fica', 'asecwt')

In [17]:
print("All figures in millions")
(pd.DataFrame({'FPUC budget': [fpuc_budget],
               'Expanded FPUC budget': [fpuc2_budget],
               'Population': [pop],
               'Adult population': [adult_pop],
               'Total FICA': [total_fica],
              }) / 1e6).round(0) 

All figures in millions


Unnamed: 0,FPUC budget,Expanded FPUC budget,Population,Adult population,Total FICA
0,71332.0,103159.0,324.0,251.0,661276.0


In [18]:
fpuc_ubi = fpuc_budget / pop
fpuc_adult_ubi = fpuc_budget / adult_pop
fpuc_fica_pct_cut = fpuc_budget / total_fica

fpuc2_ubi = fpuc2_budget / pop
fpuc2_adult_ubi = fpuc2_budget / adult_pop
fpuc2_fica_pct_cut = fpuc2_budget / total_fica

In [19]:
pd.DataFrame({'Universal one-time payment': [fpuc_ubi, fpuc2_ubi],
              'Adult one-time payment': [fpuc_adult_ubi, fpuc2_adult_ubi],
              'FICA % cut': [fpuc_fica_pct_cut * 100,
                             fpuc2_fica_pct_cut * 100]},
              index=['FPUC', 'Expanded FPUC']
            ).round(0) 

Unnamed: 0,Universal one-time payment,Adult one-time payment,FICA % cut
FPUC,220.0,285.0,11.0
Expanded FPUC,318.0,412.0,16.0


In [20]:
person2['fpuc_ubi'] = fpuc_ubi
person2['fpuc_adult_ubi'] = np.where(person2.age > 17, fpuc_adult_ubi, 0)
person2['fpuc_fica_cut'] = person2.fica * fpuc_fica_pct_cut

person2['fpuc2_ubi'] = fpuc2_ubi
person2['fpuc2_adult_ubi'] = np.where(person2.age > 17, fpuc2_adult_ubi, 0)
person2['fpuc2_fica_cut'] = person2.fica * fpuc2_fica_pct_cut

## Aggregate to SPM units

In [21]:
SPM_COLS = ['spmfamunit', 'spmtotres', 'spmthresh', 'spmwt']
CHG_COLS = ['fpuc_net', 'fpuc_ubi', 'fpuc_adult_ubi', 'fpuc_fica_cut',
            'fpuc2_net', 'fpuc2_ubi', 'fpuc2_adult_ubi', 'fpuc2_fica_cut']
spmu = person2.groupby(SPM_COLS)[CHG_COLS].sum().reset_index()
for i in CHG_COLS:
    spmu['spmtotres_' + i] = spmu.spmtotres + spmu[i]

## Map back to persons

In [22]:
spm_resource_cols = ['spmtotres_' + i for i in CHG_COLS]
person3 = person2.merge(spmu[['spmfamunit'] + spm_resource_cols],
                        on='spmfamunit')
# Poverty flags.
for i in CHG_COLS:
    person3['spmpoor_' + i ] = person3['spmtotres_' + i] < person3.spmthresh
# Also calculate baseline.
person3['spmpoor'] = person3.spmtotres < person3.spmthresh

## Export

Continue in analysis notebook.

SPM-unit data is needed for the poverty gap, which requires weight, thresholds, and resources under each condition.

In [23]:
SPM_OUTCOLS = SPM_COLS + spm_resource_cols
spmu[SPM_OUTCOLS].to_csv('spmu.csv.gz', compression='gzip', index=False)

Person-level data is needed for the main analysis: poverty rates, inequality, race breakdown, etc.

In [24]:
PERSON_OUTCOLS = (['asecwt', 'age', 'race', 'sex', 'spmpoor'] + 
                  CHG_COLS + spm_resource_cols + SPM_COLS +
                  ['spmpoor_' + i for i in CHG_COLS])
person3[PERSON_OUTCOLS].to_csv('person.csv.gz', compression='gzip',
                               index=False)