### Who are the poor?

Attempt to replicate excellent chart by Matt Bruenig

----

Source: CPS ASEC

Variables:

- Person record:       PRECORD
- Age:                 A_AGE
- Person weight:       MARSUPWT 
- Poverty status:      PERLIS
- Why not full-year:   PYRSN
- Why not employed:    RSNNOTW
- Disability:          PRDISFLG
- Weeks unemployed:    LKWEEKS
- Full-year worker:    WKSWORK

[Data dictionary](https://thedataweb.rm.census.gov/pub/cps/march/06_ASEC_2019-Data_Dictionary_Full.pdf)

In [1]:
import sys
sys.path.append('../src')

import requests

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [2]:
# Extract person records for selected variables
variables = ['PRECORD', 'A_AGE', 'MARSUPWT', 'PERLIS', 
             'SPM_Resources', 'SPM_PovThreshold', 'WKSWORK',
             'PRDISFLG', 'RSNNOTW', 'PYRSN', 'LKWEEKS']

path = '/home/brian/Documents/ASEC/'

dictfile = f'{path}/data/persfmt.txt'
dd = open(dictfile).read()
p = f'({"|".join(variables)})\s+(\d+)\s+(\d+)\s'
cols = {name: (int(start) - 1, int(start) - 1 + int(length)) 
        for name, length, start in re.findall(p, dd)}

datafile = f'{path}/data/asec2019_pubuse.dat'
df = (pd.read_fwf(datafile, 
                  colspecs=list(cols.values()), 
                  header=None, 
                  names=cols.keys())
        .query('PRECORD == 3 and MARSUPWT > 0'))

In [3]:
# Categorize population
df['Category'] = (np.where(df.A_AGE < 18, 'Children', 
                  np.where(df.A_AGE > 64, 'Elderly', 
                  np.where(((df.PRDISFLG == 1) | (df.PYRSN == 1) | (df.RSNNOTW == 1)), 'Disabled', 
                  np.where(((df.PYRSN == 3) | (df.RSNNOTW == 4)), 'Student', 
                  np.where(((df.PYRSN == 2) | (df.RSNNOTW == 3)), 'Carers',
                  np.where(((df.PYRSN == 5) | (df.RSNNOTW == 5) | (df.LKWEEKS > 0)), 'Unemployed',         
                  np.where(((df.PYRSN == 4) | (df.RSNNOTW == 2)), 'Early Retired',  
                  np.where(df.WKSWORK > 49, 'Fully Employed', 'All Other')))))))))

df['SPM'] = np.where(df['SPM_Resources'] < df['SPM_PovThreshold'], 1, 0)
df['OPM'] = np.where(df['PERLIS'] == 1, 1, 0)

In [4]:
# Only below poverty line
data = df.query('SPM == 1')
data2 = df.query('OPM == 1')

results = pd.DataFrame()

# Group share of total
results['SPM'] = (data.groupby('Category').MARSUPWT.sum() / 
                  data.MARSUPWT.sum() * 100).sort_values(ascending=False)

results['OPM'] = (data2.groupby('Category').MARSUPWT.sum() / 
                  data2.MARSUPWT.sum() * 100).sort_values(ascending=False)


results.to_csv(data_dir / 'poor.csv', index_label='name')

print(results)

                      SPM        OPM
Category                            
Children        24.387633  32.022004
Elderly         17.264041  13.313400
Fully Employed  15.870200   9.827651
Disabled        13.796373  15.609597
Carers           7.330832   8.254535
Student          6.484313   6.206497
All Other        5.932181   5.878308
Unemployed       5.537148   5.699014
Early Retired    3.397279   3.188996


In [5]:
results2 = pd.DataFrame()

# Poverty rate of each group
results2['SPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['SPM'], weights=x['MARSUPWT']))
                     * 100).loc[results.index]
results2['OPM'] = (df.groupby('Category')
                     .apply(lambda x: np.average(x['OPM'], weights=x['MARSUPWT']))
                     * 100).loc[results.index]

results2.to_csv(data_dir / 'poor2.csv', index_label='name')

print(results2)

                      SPM        OPM
Category                            
Children        13.649897  16.773802
Elderly         13.507730   9.748823
Fully Employed   5.364348   3.108906
Disabled        27.246292  28.850774
Carers          24.732919  26.063815
Student         29.041020  26.014633
All Other       14.725721  13.656449
Unemployed      21.770801  20.970621
Early Retired   23.141311  20.329840
