### Who are the poor?

Attempt to replicate excellent chart by Matt Bruenig

----

Source: CPS ASEC

Variables:

- Person record:       PRECORD
- Age:                 A_AGE
- Person weight:       MARSUPWT 
- Poverty status:      PERLIS
- Why not full-year:   PYRSN
- Why not employed:    RSNNOTW
- Disability:          PRDISFLG
- Weeks unemployed:    WEUEMP
- Full-year worker:    WEUEMP

[Data dictionary](https://thedataweb.rm.census.gov/pub/cps/march/06_ASEC_2019-Data_Dictionary_Full.pdf)

In [1]:
import sys
sys.path.append('../src')

import requests

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [2]:
# Extract person records for selected variables
variables = ['PRECORD', 'A_AGE', 'MARSUPWT', 'A_LFSR', 'PERLIS', 
             'PRDISFLG', 'WEUEMP', 'RSNNOTW', 'PYRSN']

path = '/home/brian/Documents/ASEC/'

dictfile = f'{path}/data/persfmt.txt'
dd = open(dictfile).read()
p = f'({"|".join(variables)})\s+(\d+)\s+(\d+)\s'
cols = {name: (int(start) - 1, int(start) - 1 + int(length)) 
        for name, length, start in re.findall(p, dd)}

datafile = f'{path}/data/asec2019_pubuse.dat'
df = (pd.read_fwf(datafile, 
                  colspecs=list(cols.values()), 
                  header=None, 
                  names=cols.keys())
        .query('PRECORD == 3 and MARSUPWT > 0'))

In [3]:
# Categorize population
df['CAT'] = (np.where(df.A_AGE < 18, 'Children', 
             np.where(df.A_AGE > 64, 'Elderly', 
             np.where(((df.PRDISFLG == 1) | (df.PYRSN == 1) | (df.RSNNOTW == 1)), 'Disabled', 
             np.where(((df.PYRSN == 3) | (df.RSNNOTW == 4)), 'Student', 
             np.where(((df.PYRSN == 2) | (df.RSNNOTW == 3)), 'Carers',
             np.where(((df.PYRSN == 4) | (df.RSNNOTW == 2)), 'Early Retired',  
             np.where(((df.PYRSN == 5) | (df.RSNNOTW == 5) | (df.WEUEMP.isin([2, 3, 4, 5, 6, 7]))), 'Unemployed',
             np.where(df.WEUEMP == 8, 'Fully Employed', 'All Other')))))))))

df['POVERTY'] = np.where(df['PERLIS'] == 1, 1, 0)

In [4]:
# Only below poverty line
data = df.query('POVERTY == 1')

# Group share of total
result = (data.groupby('CAT').MARSUPWT.sum() / 
          data.MARSUPWT.sum() * 100).sort_values(ascending=False)
result.to_csv(data_dir / 'poor.csv', index_label='name', header=True)

print(result)

CAT
Children          32.022004
Disabled          15.609597
Elderly           13.313400
Fully Employed     9.827651
Carers             8.254535
Student            6.206497
All Other          5.878308
Unemployed         5.699014
Early Retired      3.188996
Name: MARSUPWT, dtype: float64


In [5]:
# Poverty rate of each group
result2 = (df.groupby('CAT')
             .apply(lambda x: np.average(x['POVERTY'], 
                                         weights=x['MARSUPWT']))
           * 100).loc[result.index]

result2.to_csv(data_dir / 'poor2.csv', index_label='name', header=True)

print(result2)

CAT
Children          16.773802
Disabled          28.850774
Elderly            9.748823
Fully Employed     3.108906
Carers            26.063815
Student           26.014633
All Other         13.656449
Unemployed        20.970621
Early Retired     20.329840
dtype: float64
