In [1]:
import numpy as np
import pandas as pd
import taxcalc as tc
import microdf as mdf

# For setting a random period of unemployment given a person's duration.
np.random.seed(0)

In [2]:
run 'convert_asec_taxcalc.py'

In [3]:
run 'make_tax_units.py'

In [4]:
person = pd.read_csv('/home/mghenis/asec_2000_2019.csv.gz')

## Preprocessing

In [5]:
# Set columns to lowercase and to 0 or null as appropriate.
prep_ipum(person)
# Add taxid and related fields.
tax_unit_id(person)
# Add other person-level columns in taxcalc form.
person = convert_asec_person_taxcalc(person)
# 99 is the missing code for wksunem1.
# Note: Missing codes for features used in taxcalc are recoded in
# convert_asec_taxcalc.py.
person.loc[person.wksunem1 == 99, 'wksunem1'] = 0
# The 2014 file was released in two ways, so weights must be halved.
person.asecwt *= np.where(person.year == 2014, 0.5, 1)

## Add UI to person records

Assume that unemployment blocks are contiguous and randomly distributed.

In [6]:
person['ui_start'] = np.random.randint(1, 53 - person.wksunem1,
                                       person.shape[0])
person['ui_end'] = person.ui_start + person.wksunem1

In [7]:
FPUC_START = 13  # April was the 13th week.
FPUC_MAX_WEEKS = 17  # April to July.
FPUC2_START = FPUC_START + FPUC_MAX_WEEKS
FPUC2_MAX_WEEKS = 22  # August to December.
FPUC_WEEKLY_BEN = 600
person['fpuc_weeks'] = np.fmax(
    0, np.fmin(person.ui_end - FPUC_START,
               np.fmin(person.wksunem1, FPUC_MAX_WEEKS)))
person['fpuc2_weeks'] = np.fmax(
    0, np.fmin(person.ui_end - FPUC2_START,
               np.fmin(person.wksunem1, FPUC2_MAX_WEEKS)))
person['fpuc'] = FPUC_WEEKLY_BEN * person.fpuc_weeks
person['fpuc2'] = person.fpuc + FPUC_WEEKLY_BEN * person.fpuc2_weeks

# Checks
assert person.fpuc_weeks.max() == FPUC_MAX_WEEKS
assert person.fpuc2_weeks.max() == FPUC2_MAX_WEEKS
assert person.fpuc_weeks.min() == person.fpuc2_weeks.min() == 0

In [8]:
# Store original unemployment benefits.
person['e02300_orig'] = person.e02300

## Create tax units and calculate tax liability

In [9]:
person['FLPDYR'] = person.year - 1
person['RECID'] = person.FLPDYR * 1e9 + person.taxid

In [10]:
def get_taxes(tu):
    """ Calculates taxes by running taxcalc on a tax unit DataFrame.
    
    Args:
        tu: Tax unit DataFrame.
    
    Returns:
        Series with tax liability for each tax unit.
    """
    return mdf.calc_df(records=tc.Records(tu, weights=None, gfactors=None),
                       year=2018).tax.values

In [11]:
# Create tax unit dataframe.
tu = create_tax_unit(person)
tu['tax'] = get_taxes(tu)

Simulate FPUC.

In [12]:
# Create tax unit dataframe.
person.e02300 = person.e02300_orig + person.fpuc
tu_fpuc = create_tax_unit(person)
tu['e02300_fpuc'] = tu_fpuc.e02300
tu['tax_fpuc'] = get_taxes(tu_fpuc)
del tu_fpuc

Simulate extended FPUC.

In [13]:
# Create tax unit dataframe.
person.e02300 = person.e02300_orig + person.fpuc2
tu_fpuc2 = create_tax_unit(person)
tu['e02300_fpuc2'] = tu_fpuc2.e02300
tu['tax_fpuc2'] = get_taxes(tu_fpuc2)
del tu_fpuc2

In [14]:
# Change person e02300 back.
person.e02300 = person.e02300_orig

## Merge back to the person level

Have each person pay the share of tax differences in proportion with their FPUC.

In [15]:
tu['fpuc_total'] = tu.e02300_fpuc - tu.e02300
tu['fpuc2_total'] = tu.e02300_fpuc2 - tu.e02300
tu['fpuc_tax_total'] = tu.tax_fpuc - tu.tax
tu['fpuc2_tax_total'] = tu.tax_fpuc2 - tu.tax

In [16]:
person2 = person.merge(tu[['RECID', 'fpuc_total', 'fpuc2_total',
                           'fpuc_tax_total', 'fpuc2_tax_total']],
                       on='RECID')
del person

In [17]:
for i in ['fpuc', 'fpuc2']:
    person2[i + '_tax'] = np.where(person2[i + '_total'] == 0, 0,
        person2[i + '_tax_total'] * person2[i] / person2[i + '_total'])
    person2[i + '_net'] = person2[i] - person2[i + '_tax']

Checks that the totals match by person and tax unit, then garbage-collect.

In [18]:
assert np.allclose(tu.fpuc_total.sum(), person2.fpuc.sum())
assert np.allclose(tu.fpuc2_total.sum(), person2.fpuc2.sum())
assert np.allclose(tu.fpuc_tax_total.sum(), person2.fpuc_tax.sum())
assert np.allclose(tu.fpuc2_tax_total.sum(), person2.fpuc2_tax.sum())
del tu

## Calculate budget-neutral UBIs and payroll taxes

In [19]:
def single_year_summary(year):
    fpuc_budget = mdf.weighted_sum(person2[person2.FLPDYR == year],
                                   'fpuc_net', 'asecwt')
    fpuc1_2_budget = mdf.weighted_sum(person2[person2.FLPDYR == year],
                                      'fpuc2_net', 'asecwt')
    fpuc2_budget = fpuc1_2_budget - fpuc_budget
    pop = person2[person2.FLPDYR == year].asecwt.sum()
    adult_pop = person2[person2.FLPDYR == year][person2.age > 17].asecwt.sum()
    total_fica = mdf.weighted_sum(person2[person2.FLPDYR == year],
                                  'fica', 'asecwt')
    fpuc_ubi = fpuc_budget / pop
    fpuc_adult_ubi = fpuc_budget / adult_pop
    fpuc_fica_pct_cut = 100 * fpuc_budget / total_fica
    # Note: FPUC2 includes FPUC1.
    fpuc2_ubi = fpuc2_budget / pop
    fpuc2_adult_ubi = fpuc2_budget / adult_pop
    fpuc2_fica_pct_cut = 100 * fpuc2_budget / total_fica
    return pd.Series([fpuc_budget, fpuc2_budget, pop, adult_pop, total_fica,
                      fpuc_ubi, fpuc_adult_ubi, fpuc_fica_pct_cut,
                      fpuc2_ubi, fpuc2_adult_ubi, fpuc2_fica_pct_cut])

In [20]:
OVERALL_YEARLY_METRICS = ['fpuc_budget', 'fpuc2_budget', 'pop', 'adult_pop',
                          'total_fica']
FPUC_YEARLY_METRICS = ['fpuc_ubi', 'fpuc_adult_ubi', 'fpuc_fica_pct_cut']
FPUC2_YEARLY_METRICS = ['fpuc2_ubi', 'fpuc2_adult_ubi', 'fpuc2_fica_pct_cut']
all_metrics = (
    OVERALL_YEARLY_METRICS + FPUC_YEARLY_METRICS + FPUC2_YEARLY_METRICS)
DISPLAY_METRICS = {
    'fpuc_budget': 'Cost of FPUC',
    'fpuc2_budget': 'Cost of expanding FPUC',
    'pop': 'Population',
    'adult_pop': 'Adult population',
    'total_fica': 'Total FICA',
    'fpuc_ubi': 'Universal one-time payment (FPUC)',
    'fpuc_adult_ubi': 'Adult one-time payment (FPUC)',
    'fpuc_fica_pct_cut': 'FICA % cut (FPUC)',
    'fpuc2_ubi': 'Universal one-time payment (FPUC2)',
    'fpuc2_adult_ubi': 'Adult one-time payment (FPUC2)',
    'fpuc2_fica_pct_cut': 'FICA % cut (FPUC2)'
}
    
year_summary = pd.DataFrame({'FLPDYR': person2.FLPDYR.unique()})
year_summary[all_metrics] = year_summary.FLPDYR.apply(single_year_summary)

# Print overall summary
print("All figures in millions.")
(year_summary.set_index('FLPDYR')[OVERALL_YEARLY_METRICS].rename(
    columns=DISPLAY_METRICS) / 1e6).round(1)

  


All figures in millions.


Unnamed: 0_level_0,Cost of FPUC,Cost of expanding FPUC,Population,Adult population,Total FICA
FLPDYR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999,63185.9,38742.2,274.1,201.8,324914.5
2000,61487.6,37303.2,279.5,207.2,338245.5
2001,78398.0,46719.1,282.1,209.5,350052.0
2002,85767.7,53896.0,285.9,212.6,359996.5
2003,75490.9,49906.5,288.3,214.7,382337.9
2004,66785.1,42802.8,291.2,217.4,396703.4
2005,64676.1,40756.3,293.8,219.8,419022.7
2006,63404.9,39986.2,296.8,222.7,454031.6
2007,65897.6,41181.1,299.1,224.7,470603.1
2008,105884.7,67197.8,301.5,227.0,473660.8


In [21]:
# Print reform parameter summary.
(year_summary.set_index('FLPDYR')[
    FPUC_YEARLY_METRICS + FPUC2_YEARLY_METRICS].rename(
    columns=DISPLAY_METRICS)).round(2)

Unnamed: 0_level_0,Universal one-time payment (FPUC),Adult one-time payment (FPUC),FICA % cut (FPUC),Universal one-time payment (FPUC2),Adult one-time payment (FPUC2),FICA % cut (FPUC2)
FLPDYR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999,230.53,313.17,19.45,141.35,192.02,11.92
2000,219.98,296.75,18.18,133.46,180.03,11.03
2001,277.93,374.3,22.4,165.62,223.05,13.35
2002,299.96,403.38,23.82,188.49,253.48,14.97
2003,261.87,351.61,19.74,173.12,232.45,13.05
2004,229.37,307.23,16.84,147.0,196.91,10.79
2005,220.11,294.18,15.43,138.71,185.38,9.73
2006,213.61,284.68,13.96,134.71,179.53,8.81
2007,220.32,293.27,14.0,137.68,183.27,8.75
2008,351.21,466.51,22.35,222.89,296.06,14.19


Merge to `person2`.

In [22]:
person3 = person2.merge(
    year_summary[['FLPDYR'] + FPUC_YEARLY_METRICS + FPUC2_YEARLY_METRICS],
    on='FLPDYR')
del person2

Run calculations on all fields (except `fpuc_ubi` which already works).

In [23]:
# Zero out adult UBIs for children.
person3.loc[person3.age < 18, 'fpuc_adult_ubi'] = 0
# Calculate total FICA cut by multiplying FICA by % cut.
person3['fpuc_fica_cut'] = person3.fica * person3.fpuc_fica_pct_cut
# Similar process for FPUC2, but also adding fpuc_net since this is on top
# of the existing FPUC.
person3['fpuc2_ubi'] = person3.fpuc_net + person3.fpuc2_ubi
person3['fpuc2_adult_ubi'] = (person3.fpuc_net + 
                              np.where(person3.age > 17,
                                       person3.fpuc2_adult_ubi, 0))
person3['fpuc2_fica_cut'] = (person3.fpuc_net +
                             person3.fica * person3.fpuc2_fica_pct_cut)

## Aggregate to SPM units

In [24]:
SPM_COLS = ['FLPDYR', 'spmfamunit', 'spmtotres', 'spmthresh', 'spmwt']
CHG_COLS = ['fpuc_net', 'fpuc_ubi', 'fpuc_adult_ubi', 'fpuc_fica_cut',
            'fpuc2_net', 'fpuc2_ubi', 'fpuc2_adult_ubi', 'fpuc2_fica_cut']
spmu = person3.groupby(SPM_COLS)[CHG_COLS].sum().reset_index()
for i in CHG_COLS:
    spmu['spmtotres_' + i] = spmu.spmtotres + spmu[i]

## Map back to persons

In [25]:
# Shrink the data.
person3 = person3[['asecwt', 'age', 'race', 'sex'] + CHG_COLS + SPM_COLS]

In [26]:
spm_resource_cols = ['spmtotres_' + i for i in CHG_COLS]
person4 = person3.merge(spmu[['spmfamunit'] + spm_resource_cols],
                        on='spmfamunit')
del person3
# Poverty flags.
for i in CHG_COLS:
    person4['spmpoor_' + i ] = person4['spmtotres_' + i] < person4.spmthresh
# Also calculate baseline.
person4['spmpoor'] = person4.spmtotres < person4.spmthresh

## Export

Continue in analysis notebook.

SPM-unit data is needed for the poverty gap, which requires weight, thresholds, and resources under each condition.

In [27]:
SPM_OUTCOLS = SPM_COLS + spm_resource_cols
spmu[SPM_OUTCOLS].to_csv('spmu.csv.gz', compression='gzip', index=False)
del spmu

Person-level data is needed for the main analysis: poverty rates, inequality, race breakdown, etc.

In [28]:
PERSON_OUTCOLS = (['asecwt', 'age', 'race', 'sex', 'spmpoor'] + 
                  CHG_COLS + spm_resource_cols + SPM_COLS +
                  ['spmpoor_' + i for i in CHG_COLS])
person4[PERSON_OUTCOLS].to_csv('person.csv.gz', compression='gzip',
                               index=False)