### Income Measures from Census

Typically annual data such as from the ACS and CPS ASEC

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [174]:
url = 'https://www.bls.gov/cpi/research-series/r-cpi-u-rs-allitems.xlsx'
df = pd.read_excel(url, header=5, index_col=0)
cpi = df['AVG'].dropna()
cpi.index = pd.to_datetime(cpi.index.astype('str')) + pd.DateOffset(months=6)
cpi.index.name = 'date'
cpi.name = 'value'
cpi.to_csv(data_dir / 'cpi_u_rs.csv', index_label='date')

In [179]:
# Download table PINC-08 1_1_1 for each year
base = 'https://www2.census.gov/programs-surveys/cps/tables/'
pinc8 = 'pinc08_1_1_1.xlsx'
cols = ['Total with income', 'Median income', 'Mean income']
pinc1 = 'pinc01_1_1_1.xlsx'
d = {}
d2 = {}
for year in range(2019, 2022):
    url1 = f'{base}pinc-08/{year+1}/{pinc8}'
    df = pd.read_excel(url1, header=9, index_col=0)
    df = df[cols].iloc[1:]
    d[year] = df.dropna()
    url2 = f'{base}pinc-01/{year+1}/{pinc1}'
    pop = pd.read_excel(url2, 'pinc01', index_col=None, 
                        usecols = 'B', header = 11, nrows=0)
    d2[year] = pop.columns.values[0]
di = {'Pop': d2, 'Inc': d}
pd.to_pickle(di,data_dir / 'pinc_temp.pickle')

In [96]:
yr1 = 2021
yr2 = 2019
cpi = pd.read_csv(data_dir / 'cpi_u_rs.csv', index_col='date', parse_dates=True)
pch = (cpi.loc[f'{yr1}-07-01'] / cpi.loc[f'{yr2}-07-01']).value

di = pd.read_pickle(data_dir / 'pinc_temp.pickle')
res = pd.concat(di['Inc'].values(), axis=1, keys=di['Inc'].keys())
res.index = res.index.str.replace('....Total', '\\textbf{Total}', regex=False)
res = res[res.index.str.contains('..', regex=False)==False]
res = res.rename({'SSI (Supplemental Security Income)': 'Supplemental Security Income', 
            'Financial assistance from outside the household': 'Outside financial assistance',
            'Other income, N.E.C.': 'Other'})
res = res.loc[:,res.columns.get_level_values(0).isin([yr1, yr2])]
res = res.reindex(columns=[yr1, yr2], level=0)
# Adjust for inflation
prvals = [(yr2, 'Median income'), (yr2, 'Mean income')]
res[prvals] = res[prvals] * pch
final = res.astype('int')

# Save version for download
res = res.applymap('{:,.0f}'.format)
res.index.name = ''
res.to_csv(data_dir / 'pinc08.csv', index_label='category')

# Store years separately for tex table
year1 = f'{res.columns[0][0]}'
year2 = f'{res.columns[3][0]}'
write_txt(text_dir / 'pinc_year1.txt', year1)
write_txt(text_dir / 'pinc_year2.txt', year2)

# Save version for tex table
res.droplevel(0, axis=1).to_csv(data_dir / 'pinc08.tex', sep='&', 
             line_terminator='\\\ ', quotechar=' ')

In [102]:
df = pd.DataFrame()
df[f'Mean {yr1}'] = final[(yr1, 'Mean income')]
df[f'Number {yr1}'] = final[(yr1, 'Total with income')] * 1000
df[f'Total {yr1}'] = df[f'Mean {yr1}'] * df[f'Number {yr1}']
df[f'Mean {yr2}'] = final[(yr2, 'Mean income')]
df[f'Number {yr2}'] = final[(yr2, 'Total with income')] * 1000
df[f'Total {yr2}'] = df[f'Mean {yr2}'] * df[f'Number {yr2}']
df['PctCh'] = ((df[f'Total {yr1}'] / df[f'Total {yr2}']) - 1) * 100
df['LvlCh'] = df[f'Total {yr1}'] - df[f'Total {yr2}']
df['LvlChP'] = (df[f'Number {yr1}'] * df[f'Mean {yr2}']) - df[f'Total {yr2}']
df['LvlChM'] = (df[f'Number {yr2}'] * df[f'Mean {yr1}']) - df[f'Total {yr2}']
df['ContribP'] = (df['LvlChP'] / df.loc['\\textbf{Total}', 'LvlCh']) * df.loc['\\textbf{Total}', 'PctCh']
df['ContribM'] = (df['LvlChM'] / df.loc['\\textbf{Total}', 'LvlCh']) * df.loc['\\textbf{Total}', 'PctCh']
df['Contrib'] = (df['LvlCh'] / df.loc['\\textbf{Total}', 'LvlCh']) * df.loc['\\textbf{Total}', 'PctCh']

# Determine six most prominent sources of change
df2 = df.drop(['\\textbf{Total}', 'Other'])
cats = (abs(pd.concat([df2['ContribP'], df2['ContribM'], df2['Contrib']]))
 .sort_values(ascending=False)
 .index.drop_duplicates(keep='first'))[:6]
# Top six used for bar chart
df.loc[cats, ['ContribP', 'ContribM']].to_csv(data_dir / 'pinc.csv', 
                                              index_label='Name')

# Save full version for download
(df[['Contrib', 'ContribP', 'ContribM']]
   .to_csv(data_dir / 'pinc_ch_contrib.csv', index_label='category'))

In [135]:
# Manual description for now
val = df.loc['\\textbf{Total}', 'PctCh']
valtext = value_text(val, 'increase_by', digits=2, adj='total')
text = (f'From {yr2} to {yr1}, aggregate pre-tax personal income {valtext}, '+
        f'after adjusting for changes in prices. Compared to {yr2}, fewer '+
        f'people received earnings from work, while mean earnings increased. '+
        f'Real income from pensions and retirement plans fell over the period. '+
        f'Unemployment compensation and property income contributed to the '+
        'overall growth in real income. ')
write_txt(text_dir / 'pi_ch_ann.txt', text)
print(text)

From 2019 to 2021, aggregate pre-tax personal income increased by a total of 0.55 percent, after adjusting for changes in prices. Compared to 2019, fewer people received earnings from work, while mean earnings increased. Real income from pensions and retirement plans fell over the period. Unemployment compensation and property income contributed to the overall growth in real income. 


In [None]:
# Table made manually from combining PINC-09 from 2021 and 2020
df = (pd.read_csv('../data/pinc-09-2020-2019.csv', index_col='Name')
        .drop(['Wages and salary', 'Nonfarm self-employment']))
df['Mean2019'] = df['Mean2019'] * (381.2/376.5) #CPI=RS-U
years = [2020, 2019]
for year in years:
    df[f'Total{year}'] = df[f'Number{year}'] * df[f'Mean{year}']
    
df['PctCh'] = ((df['Total2020'] / df['Total2019']) - 1) * 100
df['LvlCh'] = df['Total2020'] - df['Total2019']
df['LvlChP'] = (df['Number2020'] * df['Mean2019']) - df['Total2019']
df['LvlChM'] = (df['Number2019'] * df['Mean2020']) - df['Total2019']
df['ContribP'] = (df['LvlChP'] / df.loc['Total', 'LvlCh']) * df.loc['Total', 'PctCh']
df['ContribM'] = (df['LvlChM'] / df.loc['Total', 'LvlCh']) * df.loc['Total', 'PctCh']
df['Contrib'] = (df['LvlCh'] / df.loc['Total', 'LvlCh']) * df.loc['Total', 'PctCh']

cats = ['Earnings', 'Unemployment compensation', 
        'Property income', 'Retirement income']
df.loc[cats, ['ContribP', 'ContribM']].to_csv(data_dir / 'pinc.csv', 
                                              index_label='Name')
val = df.loc['Total', 'PctCh']
valtext = value_text(val, 'increase_by')
text = (f'In {years[0]}, aggregate pre-tax personal income {valtext}')
write_txt(text_dir / 'pi_ch_ann.txt', text)
print(text)

In [None]:
url15 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2016/pinc08_1_1_1.xls'
url16 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2017/pinc08_1_1_1.xls'
url17 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2018/bridge/pinc08_1_1_1.xls'
url18 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2019/pinc08_1_1_1.xls'
url19 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2020/pinc08_1_1_1.xlsx'
url20 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2021/pinc08_1_1_1.xlsx'
url21 = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/2022/pinc08_1_1_1.xlsx'

In [139]:
# Download table PINC-08 1_1_1 for each year
base = 'https://www2.census.gov/programs-surveys/cps/tables/pinc-08/'
file = 'pinc08_1_1_1.xls'
cols1 = ['Total with income', 'Median income', 'Mean income']
cols2 = ['Total with Income', 'Median income', 'Mean income']
hdr1 = 9
hdr2 = 10
urls = {}
d = {}
for year in range(2018, 2022):
    bridge = 'bridge/' if year == 2017 else ''
    x = 'x' if year > 2018 else ''
    cols = cols1 if year > 2018 else cols2
    hdr = hdr2 if year in [2017, 2018] else hdr1
    url = f'{base}{year+1}/{bridge}{file}{x}'
    df = pd.read_excel(url, header=hdr, index_col=0)
    urls[year] = url
    df = df[cols].iloc[1:]
    if year < 2017:
        df = df.loc[:'Combination of Income Types:']
    df.index = df.index.str.replace('.', '', regex=False).str.lower()
#    df = df.rename({'Other Income, NEC': 'Other income, NEC'})
    d[year] = df.dropna()#.rename({'Total with Income': 'Total with income'}, 
                #        axis=1).dropna()