### CPS ASEC - Replicating Poverty Estimates

Brian Dew

Updated September 15, 2020 for 2019 data

----


Replicate official poverty rate and number of people in poverty. Then replicate the same for the SPM.

Next replicate Matt Bruenig data--grouping and market income

In [1]:
import re
import pandas as pd
import numpy as np

In [4]:
# Extract person records for selected variables
variables = ['PRECORD', 'A_AGE', 'MARSUPWT', 'PERLIS', 
             'SPM_Resources', 'SPM_PovThreshold', 'WKSWORK',
             'PRDISFLG', 'RSNNOTW', 'PYRSN', 'LKWEEKS',
             'SPM_Weight', 'SPM_ID', 'SPM_CapWkCCXpns',
             'SPM_Totval', 'SPM_MedXpns', 'SPM_ChildSupPd',
             'SPM_Poor']

benefits = ['SS_VAL', 'SSI_VAL', 'UC_VAL', 'VET_VAL', 
            'WC_VAL', 'PAW_VAL']

path = '/home/brian/Documents/ASEC/'

dictfile = f'{path}/data/persfmt.txt'
dd = open(dictfile).read()
p = f'({"|".join(variables + benefits)})\s+(\d+)\s+(\d+)\s'
cols = {name: (int(start) - 1, int(start) - 1 + int(length)) 
        for name, length, start in re.findall(p, dd)}

# Manually adjust because 2020 text dict missing
cols['SPM_ID'] = (1419-1, 1419-1+8)
cols['SPM_CapWkCCXpns'] = (1437-1, 1437-1+6)
cols['SPM_ChildSupPd'] = (1449-1, 1449-1+6)
cols['SPM_MedXpns'] = (1500-1, 1500-1+7)
cols['SPM_Poor'] = (1513-1, 1513-1+1)
cols['SPM_PovThreshold'] = (1514-1, 1514-1+5)
cols['SPM_Resources'] = (1519-1, 1519-1+7)
cols['SPM_Totval'] = (1542-1, 1542-1+7)
cols['SPM_Weight'] = (1550-1, 1550-1+7)
cols['PERLIS'] = (914-1, 914-1+2)
cols['PYRSN'] = (322-1, 322-1+1)
cols['WKSWORK'] = (337-1, 337-1+2)
cols['LKWEEKS'] = (304-1, 304-1+2)
cols['RSNNOTW'] = (323-1, 323-1+1)
cols['SS_VAL'] = (623-1, 623-1+5)
cols['SSI_VAL'] = (629-1, 629-1+5)
cols['UC_VAL'] = (661-1, 661-1+5)
cols['VET_VAL'] = (673-1, 673-1+6)
cols['WC_VAL'] = (681-1, 681-1+5)
cols['PAW_VAL'] = (690-1, 690-1+5)

datafile = f'{path}/data/asec2020_pubuse.dat'
df = (pd.read_fwf(datafile, 
                  colspecs=list(cols.values()), 
                  header=None, 
                  names=list(cols.keys()))
        .query('PRECORD == 3'))

In [5]:
# Categorize population
df['Category'] = (np.where(df.A_AGE < 18, 'Children', 
                  np.where(df.A_AGE > 64, 'Elderly', 
                  np.where(((df.PRDISFLG == 1) | (df.PYRSN == 1) | (df.RSNNOTW == 1)), 'Disabled', 
                  np.where(((df.PYRSN == 3) | (df.RSNNOTW == 4)), 'Student', 
                  np.where(((df.PYRSN == 2) | (df.RSNNOTW == 3)), 'Carers',
                  np.where(((df.PYRSN == 5) | (df.RSNNOTW == 5) | (df.LKWEEKS > 0)), 'Unemployed',         
                  np.where(((df.PYRSN == 4) | (df.RSNNOTW == 2)), 'Early Retired',  
                  np.where(df.WKSWORK > 49, 'Fully Employed', 'All Other')))))))))

df['SPM'] = np.where(df['SPM_Resources'] < df['SPM_PovThreshold'], 1, 0)
df['OPM'] = np.where(df['PERLIS'] == 1, 1, 0)
df['MARKET_INCOME'] = (df['SPM_Totval'] - 
                       df[['SPM_CapWkCCXpns','SPM_MedXpns', 'SPM_ChildSupPd']].sum(axis=1) - 
                       df[benefits].sum(axis=1).groupby(df['SPM_ID']).transform('sum'))
df['SPM_MI'] = np.where(df['MARKET_INCOME'] < df['SPM_PovThreshold'], 1, 0)

In [6]:
results = pd.DataFrame()

# Group share of poor people
results['SPM'] = (df.query('SPM == 1').groupby('Category').SPM_Weight.sum() / 
                  df.query('SPM == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)

results['OPM'] = (df.query('OPM == 1').groupby('Category').MARSUPWT.sum() / 
                  df.query('OPM == 1').MARSUPWT.sum() * 100).sort_values(ascending=False)


results['SPM_MI'] = (df.query('SPM_MI == 1').groupby('Category').SPM_Weight.sum() / 
                  df.query('SPM_MI == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)

print(results)

                      SPM        OPM     SPM_MI
Category                                       
Children        23.552475  30.795875  21.062952
Elderly         19.033379  14.295022  33.935611
Fully Employed  15.779747  10.050124  12.264199
Disabled        13.183932  15.317319  13.416647
Carers           8.629066  10.026422   5.671404
Student          8.063140   8.087707   4.775935
Unemployed       5.738762   5.819194   3.933209
Early Retired    3.626288   3.407859   3.468531
All Other        2.393210   2.200479   1.471512


In [7]:
results2 = pd.DataFrame()

# Poverty rate of each group
results2['SPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['SPM'], weights=x['SPM_Weight']))
                     * 100).loc[results.index]
results2['OPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['OPM'], weights=x['MARSUPWT']))
                     * 100).loc[results.index]

results2['SPM_MI'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['SPM_MI'], weights=x['SPM_Weight']))
                     * 100).loc[results.index]

print(results2)

                      SPM        OPM     SPM_MI
Category                                       
Children        12.406551  14.307068  22.143021
Elderly         12.751231   8.890741  45.372737
Fully Employed   4.857504   2.780689   7.534517
Disabled        23.521702  24.229192  47.771721
Carers          21.369661  22.580328  28.030288
Student         22.411029  18.548014  26.492266
Unemployed      20.887578  18.172532  28.570644
Early Retired   19.312494  16.703581  36.865899
All Other       16.977166  13.751578  20.832978
