### CPS ASEC - Replicating Poverty Estimates

Brian Dew

December 28, 2019

----


Replicate official poverty rate and number of people in poverty. Then replicate the same for the SPM.

Next replicate Matt Bruenig data--grouping and market income

In [1]:
import re
import pandas as pd
import numpy as np

In [2]:
# Extract person records for selected variables
variables = ['PRECORD', 'A_AGE', 'MARSUPWT', 'PERLIS', 
             'SPM_Resources', 'SPM_PovThreshold', 'WKSWORK',
             'PRDISFLG', 'RSNNOTW', 'PYRSN', 'LKWEEKS',
             'SPM_Weight', 'SPM_ID', 'SPM_CapWkCCXpns',
             'SPM_Totval', 'SPM_MedXpns', 'SPM_ChildSupPd',
             'SPM_Poor']

benefits = ['SS_VAL', 'SSI_VAL', 'UC_VAL', 'VET_VAL', 
            'WC_VAL', 'PAW_VAL']

path = '/home/brian/Documents/ASEC/'

dictfile = f'{path}/data/persfmt.txt'
dd = open(dictfile).read()
p = f'({"|".join(variables + benefits)})\s+(\d+)\s+(\d+)\s'
cols = {name: (int(start) - 1, int(start) - 1 + int(length)) 
        for name, length, start in re.findall(p, dd)}

datafile = f'{path}/data/asec2019_pubuse.dat'
df = (pd.read_fwf(datafile, 
                  colspecs=list(cols.values()), 
                  header=None, 
                  names=cols.keys())
        .query('PRECORD == 3'))

In [3]:
# Categorize population
df['Category'] = (np.where(df.A_AGE < 18, 'Children', 
                  np.where(df.A_AGE > 64, 'Elderly', 
                  np.where(((df.PRDISFLG == 1) | (df.PYRSN == 1) | (df.RSNNOTW == 1)), 'Disabled', 
                  np.where(((df.PYRSN == 3) | (df.RSNNOTW == 4)), 'Student', 
                  np.where(((df.PYRSN == 2) | (df.RSNNOTW == 3)), 'Carers',
                  np.where(((df.PYRSN == 5) | (df.RSNNOTW == 5) | (df.LKWEEKS > 0)), 'Unemployed',         
                  np.where(((df.PYRSN == 4) | (df.RSNNOTW == 2)), 'Early Retired',  
                  np.where(df.WKSWORK > 49, 'Fully Employed', 'All Other')))))))))

df['SPM'] = np.where(df['SPM_Resources'] < df['SPM_PovThreshold'], 1, 0)
df['OPM'] = np.where(df['PERLIS'] == 1, 1, 0)
df['MARKET_INCOME'] = (df['SPM_Totval'] - 
                       df[['SPM_CapWkCCXpns','SPM_MedXpns', 'SPM_ChildSupPd']].sum(axis=1) - 
                       df[benefits].sum(axis=1).groupby(df['SPM_ID']).transform('sum'))
df['SPM_MI'] = np.where(df['MARKET_INCOME'] < df['SPM_PovThreshold'], 1, 0)

In [4]:
results = pd.DataFrame()

# Group share of poor people
results['SPM'] = (df.query('SPM == 1').groupby('Category').SPM_Weight.sum() / 
                  df.query('SPM == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)

results['OPM'] = (df.query('OPM == 1').groupby('Category').MARSUPWT.sum() / 
                  df.query('OPM == 1').MARSUPWT.sum() * 100).sort_values(ascending=False)


results['SPM_MI'] = (df.query('SPM_MI == 1').groupby('Category').SPM_Weight.sum() / 
                  df.query('SPM_MI == 1').SPM_Weight.sum() * 100).sort_values(ascending=False)

print(results)

                      SPM        OPM     SPM_MI
Category                                       
Children        23.935014  32.022004  21.567392
Elderly         17.859979  13.313400  33.406293
Fully Employed  15.942622   9.827651  11.044724
Disabled        13.713975  15.609597  13.612266
Carers           7.432734   8.254535   5.135020
Student          6.251977   6.206497   3.809745
All Other        5.947823   5.878308   4.195099
Unemployed       5.438619   5.699014   3.761860
Early Retired    3.477258   3.188996   3.467599


In [5]:
results2 = pd.DataFrame()

# Poverty rate of each group
results2['SPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['SPM'], weights=x['SPM_Weight']))
                     * 100).loc[results.index]
results2['OPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['OPM'], weights=x['MARSUPWT']))
                     * 100).loc[results.index]

results2['SPM_MI'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['SPM_MI'], weights=x['SPM_Weight']))
                     * 100).loc[results.index]

print(results2)

                      SPM        OPM     SPM_MI
Category                                       
Children        13.420792  16.773802  22.909574
Elderly         13.506284   9.748823  47.858398
Fully Employed   5.346813   3.108906   7.017220
Disabled        27.272641  28.850774  51.282502
Carers          24.596071  26.063815  32.190979
Student         29.753568  26.014633  34.347294
All Other       14.977139  13.656449  20.011887
Unemployed      21.936629  20.970621  28.744774
Early Retired   23.043398  20.329840  43.532487
