### Handle Charts Driven by Multiple Sources

This notebook contains the codeblocks that pull from multiple sources. For example, if a chart uses both CES and CPS data, it would be run twice if located in either of those notebooks. 

Run this notebook after running other jobs day notebooks.

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [2]:
ref = lambda x: '\\bibitem{{{name}}} {author}. \\textit{{{title}}}. Available at: \\url{{{url}}}.'.format(**x)
df = pd.read_csv('/home/brian/Documents/uschartbook/chartbook/references.csv')
df = df.assign(REF = df.apply(ref, 1)).sort_values(['author', 'title']).REF
group_size = 15
for i in range(0, len(df), group_size):
    group = '\n'.join(df.iloc[i:i+group_size])
    fname = f'reference_group_{(i/group_size)+1:.0f}.txt'
    write_txt(text_dir / fname, group)

### Wage Growth Summary Table

In [3]:
# Average Hourly Earnings
s = {'CES0500000003': 'AHE', 'CES0500000008': 'PNS', 
     'CES0600000008': 'Goods', 'CES0800000008': 'Serv'}
# AHE series
df = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True)[s.keys()].rename(s, axis=1)
dfg = df['Goods']
ch = df.pct_change(12) * 100

# Median wage from CPS
ch['UWE_P50'] = pd.read_csv(data_dir / 'uwe_cps.csv', index_col='date', 
                            parse_dates=True)['p50_gr']
# Wage Growth Tracker
ch['WGT'] = pd.read_csv(data_dir / 'atl_wgt.csv', 
                        index_col='date', parse_dates=True)['bd_cps']

# NIPA Wages and Salaries / NFP
df = pd.read_csv(data_dir / 'pi_raw.csv', index_col='date', 
                 parse_dates=True)
# Nonfarm payrolls
nfp = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True)['CES0000000001']
ch['NIPA'] = (df['A034RC'] / nfp).dropna().pct_change(12) * 100

# 3 Month Moving average for volatile series
for i in ['UWE_P50', 'WGT', 'NIPA']:
    ch[f'{i}_3m'] = ch[i].rolling(3).mean()
    
ch.loc['1989':].to_csv(data_dir / 'wages_yy_monthly.csv', 
                      index_label='date')

In [4]:
# Rename for table
d = {'AHE': 'Average Hourly Earnings (AHE), Private',
     'PNS': '\hspace{2mm} Production \& Nonsupervisory',
     'Goods': '\hspace{4mm} Goods-Producing Industries',
     'Serv': '\hspace{4mm} Service-Providing Industries',
     'UWE_P50': 'Usual Weekly Earnings, Median',
     'UWE_P50_3m': 'Usual Weekly Earnings, Median (3M Avg)',
     'WGT': 'Wage Growth Tracker, Median',
     'WGT_3m': 'Wage Growth Tracker, Median (3M Avg)',
     'NIPA': 'Wages \& Salaries, Average (NIPA)',
     'NIPA_3m': 'Wages \& Salaries, Average (3M Avg)'}

dfm = pd.read_csv(data_dir / 'wages_yy_monthly.csv', index_col='date', 
                 parse_dates=True)[d.keys()].rename(d, axis=1)

# Latest 6 months
res = dfm.iloc[-6:].iloc[::-1].T
# Same month, prior two years
for i in [-13, -25]:
    res[dfm.index[i]] = dfm.iloc[i]
    
res.columns = [dtxt(dt)['mon6'] for dt in res.columns]
res = res.applymap('{:.1f}'.format).replace('nan', '--')
(res.to_csv(data_dir / 'wages_yy_monthly.tex', sep='&', lineterminator='\\\ ', 
           quotechar=' '))

In [5]:
# Quarterly measures of wages 
# Median wage from BLS 
d = {'IndexWS': 'Wages \& Salaries (ECI)',
     'P50_tot': 'Usual Weekly Earnings, Median',
     'nfbus_ulc': 'Unit Labor Cost'}
df = (pd.read_csv(data_dir / 'uwe_main.csv', index_col='date', 
                            parse_dates=True)) 

# Wages and Salaries from Employment Cost Index
eci = pd.read_csv(data_dir / 'eci.csv', index_col='date', 
                 parse_dates=True)[['IndexWS','IndexWSGoods']]

# Unit Labor Costs
ulc = pd.read_csv(data_dir / 'lprod.csv', index_col='date', 
                 parse_dates=True)[['business_ulc', 'manuf_ulc', 'nfbus_ulc']]

df = pd.concat([df, eci, ulc], axis=1)

# Average Hourly Earnings PNS, Goods
df['AHEGoods'] = dfg.resample('QS').mean()

dfy = df.pct_change(4, fill_method=None) * 100 
dfy.loc['1989':].to_csv(data_dir / 'wages_yy_quarterly.csv', 
                      index_label='date')
dfq = df.pct_change(fill_method=None) * 400 

# Gender Wage Gap
gwg = (df['P50_women'] / df['P50_men']).rolling(4).mean()
gwg.name = 'GWG'
gwg.loc['1989':].multiply(100).to_csv(data_dir / 'gwg.csv', 
                      index_label='date')
node = end_node(gwg * 100, 'red', date='qs', offset=0.1,
                anchor='south', align='center', colon=False)
write_txt(text_dir / 'gwg_node.txt', node)

# Sumary Table
# Latest 5 quarters
res = dfy.iloc[-5:].iloc[::-1].T
# Same quarter, prior two years
for i in [-9, -13, -17]:
    res[dfy.index[i]] = dfy.iloc[i]
    
res = res.loc[d.keys()].rename(d)
res.columns = [dtxt(dt)['qtr4'] for dt in res.columns]
res = res.applymap('{:.1f}'.format).replace('nan', '--')
(res.to_csv(data_dir / 'wages_yy_quarterly.tex', sep='&', lineterminator='\\\ ', 
           quotechar=' '))

In [6]:
# Gender Wage Gap Text
ltdt = dtxt(gwg.index[-1])['qtr1']
gap = gwg.iloc[-1] * 100
gap2 = 100 - gap
cmp = '2019-10-01'
cmpdt = dtxt(cmp)['qtr1']
gapcmp = (1.0 - gwg.loc['2019-10-01']) * 100
dt1 = '1989-01-01'
dt2 = '2006-01-01'
dt3 = '2006-01-01'
dt4 = cmp

# Annualize rates (divide by 59 quarters)
hgap1 = value_text((gwg.loc[dt2] - gwg.loc[dt1]) * (100 / (59 / 4)), 
                   'plain', 'pp', digits=2)
hgap2 = value_text((gwg.loc[dt4] - gwg.loc[dt3]) * (100 / (59 / 4)), 
                   'plain', 'pp', digits=2)

text = ('In 1989, the gender wage gap was 30 percent; women were paid '+
        '70 cents for each dollar men were paid. From 1989 to 2006, '+
        f'the gap closed at a rate of {hgap1} per year. '+
        'From 2006 to 2019 Q4, the gap closed at a '+
        f'rate of only {hgap2} per year.\n\n'+
        f'Over the year ending {ltdt}, the gender wage gap is {gap2:.1f} '+
        f'percent; women are paid {gap:.1f} cents on the dollar. '+
        f'Pre-pandemic, in 2019 Q4, the gap was {gapcmp:.1f} percent.')
write_txt(text_dir / 'gwg.txt', text)
print(text)

In 1989, the gender wage gap was 30 percent; women were paid 70 cents for each dollar men were paid. From 1989 to 2006, the gap closed at a rate of 0.74 percentage point per year. From 2006 to 2019 Q4, the gap closed at a rate of only 0.03 percentage point per year.

Over the year ending 2023 Q3, the gender wage gap is 16.4 percent; women are paid 83.6 cents on the dollar. Pre-pandemic, in 2019 Q4, the gap was 18.4 percent.


In [7]:
# Summary text 
d, d2 = {}, {} # One-year change and change since 2019
# Monthly series
for s in ['AHE', 'PNS', 'Goods', 'WGT', 'WGT_3m']:
    d[s] = ch[s].dropna().diff(12).iloc[-1]
    d2[s] = ch[s].dropna().iloc[-1] - ch.loc['2019', s].mean()
    
# Quarterly Series    
for s in ['IndexWS', 'IndexWSGoods', 'p50uwe']:
    d[s] = dfy[s].dropna().diff(4).iloc[-1]
    d2[s] = dfy[s].dropna().iloc[-1] - dfy.loc['2019', s].mean()
    
chlt = pd.DataFrame({'One-year': d, 'Since 2019': d2})
keych = chlt.loc[['IndexWS', 'WGT_3m']].mean()
chlt.loc['keych'] = keych
chdf = pd.concat([pd.cut(chlt[c], [-50, -0.49, 0.49, 50], 
                         labels=['below', 'same', 'above']) 
           for c in chlt.columns], axis=1)

ref = chdf.drop('keych')
keym = chdf.loc['keych']

# What percent of the series match the key series?
pct = pd.DataFrame(map(lambda k: ref[k]==keym[k], ref)).all().mean()

keydir1 = ('falling' if keych['One-year'] <= -0.49 else 'rising' 
           if keych['One-year'] >=0.49 else 'stable')
keydir2 = ('below' if keych['Since 2019'] <= -0.49 else 'above' 
           if keych['Since 2019'] >=0.49 else 'in line with')

keyt = 'key'
if pct > 0.8:
    keyt = 'nearly all'
elif pct > 0.5:
    keyt = 'most'
    
but = 'but still' if ((keydir1 == 'falling') & (keydir2 == 'above') | 
                (keydir1 == 'rising') & (keydir2 == 'below')) else 'and'    

text = (f'{keyt} measures show one-year nominal wage growth that is {keydir1} '+
        f'{but} {keydir2} the pre-pandemic rate.')
write_txt(text_dir / 'wage_rec_summary.txt', text)
print(text)

nearly all measures show one-year nominal wage growth that is falling but still above the pre-pandemic rate.


### Recessions Table / Sahm Rule

In [8]:
# Retrieve recession info from FRED
#rec = fred_df('USREC')
#rec.to_csv(data_dir / 'recessions_raw.csv', index_label='date')

In [9]:
rec = pd.read_csv(data_dir / 'recessions_raw.csv', 
                  index_col='date', parse_dates=True)
first = rec[(rec.VALUE==1) & (rec.VALUE.shift(1) == 0)]
post = rec[(rec.VALUE==0) & (rec.VALUE.shift(1) == 1)]
names = [' \ Early `90s Recession', ' \ Early `00s Recession', 
         ' \ Great Recession', ' \ COVID-19 Recession']
recs = (pd.Series(data=first.index, index=names)
        .rename('First').to_frame())
recs['Last'] = rec[(rec.VALUE==1) & (rec.VALUE.shift(-1) == 0)].index
recs['Pre'] = rec[(rec.VALUE==0) & (rec.VALUE.shift(-1) == 1)].index
recs['Post'] = post.index
dur = [i.n for i in (post.index.to_period('M') - 
                     first.index.to_period('M'))]
recs['Dur'] = pd.Series(data=dur, index=recs.index)
recs['PrevEnd'] = recs['Post'].shift(1)
recs.loc[' \ Early `90s Recession', 'PrevEnd'] = pd.to_datetime('1989-01-01')
recs['NextStart'] = recs['Pre'].shift(-1)
recs.loc[' \ COVID-19 Recession', 'NextStart'] = cps_date()
recs['Start'] = recs.First.apply(lambda x: dtxt(x)['mon2'])
recs['End'] = recs.Last.apply(lambda x: dtxt(x)['mon2'])
rgdp = nipa_df(retrieve_table('T10106')['Data'], ['A191RX'])['A191RX']
unrate = pd.read_csv(data_dir / 'jobs_report_main.csv', index_col='date', 
                 parse_dates=True)['Total']
for row in recs.itertuples():
    # Real GDP change
    vprev = rgdp.loc[:row.Pre].max()
    vmin = rgdp.loc[row.First:row.NextStart].min()
    ch = ((vmin / vprev) - 1) * 100
    recs.loc[row.Index, 'GDPch'] = ch
    # Unemployment rate change and duration
    pravg = unrate.loc[row.Pre - pd.DateOffset(years=3): row.Pre].mean()
    vmax = unrate.loc[row.First:row.NextStart].max()
    uch = vmax - pravg
    rdt = (unrate.loc[row.Last:].loc[(unrate <= pravg)].index[0] 
           if pravg >= unrate.iloc[-1] else '--')
    rtime = (int((rdt.to_period('M') - row.Last.to_period('M')).n) 
             if rdt != '--' else '--')
    recs.loc[row.Index, 'Unratech'] = uch    
    recs.loc[row.Index, 'RecoDate'] = rdt
    recs.loc[row.Index, 'RecoTime'] = str(rtime)
recs['GDPcht'] =  recs.GDPch.apply('{:.1f}'.format)
recs['Uncht'] =  recs.Unratech.apply('+{:.1f}'.format)
tbl = recs[['Start', 'End', 'Dur', 'GDPcht', 'Uncht', 'RecoTime']]
tbl.columns = ['Start \ \ \ Month', 'End \ \ \ \ \ \ Month', 
               'Recession Duration, Months', 
               'GDP Percent Change', 'Unemp. Rate Change*', 
               'Unemp. Rate Recovery, Months**']
tbl.to_csv(data_dir / 'recession.tex', sep='&', 
           lineterminator='\\\ ', quotechar=' ')

un3 = unrate.rolling(3).mean()
sahm = (un3 - un3.rolling(12).min()).dropna()
sahm.to_csv(data_dir / 'sahm.csv', index_label='date', 
            header=True)

# End Node
node = end_node(sahm, 'blue!60!black', digits=2, date='m', offset=0.35)
write_txt(text_dir / 'sahm_node.txt', node)

bar = pd.Series(index=[sahm.index[0], sahm.index[-1]], 
                data=[0.5, 0.5], name='Bar')
bar.to_csv(data_dir / 'sahm_bar.csv', index_label='date', 
           header=True)
node = end_node(bar, 'gray', loc='start')
write_txt(text_dir / 'sahm_bar_node.txt', node)
marks = (sahm.loc[(sahm > 0.5) & (sahm.shift(1) < 0.5)]
             .rename('Mark').to_frame())
marks['Intersect'] = len(marks) * [0.5]
marks.to_csv(data_dir / 'sahm_marks.csv', index_label='date')

dur90 = numbers[f'{recs.Dur.iloc[0]:.1f}']
unrec90 = recs.RecoTime.iloc[0]
unrec00 = round(int(recs.RecoTime.iloc[1]) / 12)
durgr = recs.Dur.iloc[2]
unrecgr = recs.RecoTime.iloc[2]
durco = numbers[f'{recs.Dur.iloc[3]:.1f}']
gdpco = abs(recs.GDPch.iloc[3])

text = ('During the early 1990s recession, output contracted '+
        f'for {dur90} months and unemployment was higher '+
        f'than its pre-recession average for {unrec90} months. '+
        'The drop in output was smaller during the '+
        'early 2000s recession, but unemployment rates '+
        f'took almost {unrec00} years to recover.\n\n'+
        'The 2008--2009 great recession, caused by the '+
        'collapse of a housing bubble, was very severe. '+
        f'The recession lasted {durgr} months, with higher '+
        f'rates of unemployment lasting {unrecgr} months. The '+
        'most-recent COVID-19 recession was extremely severe '+
        f'and also extremely short-lived, lasting only {durco} '+
        f'months, but with output reduced {gdpco:.1f} percent.')
write_txt(text_dir / 'recessions.txt', text)
print(text)

During the early 1990s recession, output contracted for eight months and unemployment was higher than its pre-recession average for 63 months. The drop in output was smaller during the early 2000s recession, but unemployment rates took almost 16 years to recover.

The 2008--2009 great recession, caused by the collapse of a housing bubble, was very severe. The recession lasted 18 months, with higher rates of unemployment lasting 89 months. The most-recent COVID-19 recession was extremely severe and also extremely short-lived, lasting only two months, but with output reduced 9.1 percent.


### Gross Labor Income

In [10]:
df = (pd.read_csv(data_dir / 'jobs_report_main2.csv', parse_dates=[0])
         .set_index('date')[['avghrstot', 'EMPsa']])
emp = (df['avghrstot'] * df['EMPsa']).rename('Total')
coe = nipa_df(retrieve_table('T20100')['Data'], ['A033RC'])
data = coe.join(emp.resample('QS').mean()).dropna()
data['coe_inp'] = data['A033RC'] / data['Total']
data['wage'] = data['coe_inp'] * data['Total'].iloc[0]
data['work'] = data['A033RC'] - data['wage']

# Calculate contributions to growth
result = (growth_contrib(data, 'A033RC')[['work', 'wage']]
          .rolling(4).mean().dropna())
result['sum'] = result.sum(axis=1)
result.to_csv(data_dir / 'gli.csv', index_label='date')

# Horizontal bar at 5
start = dtxt(result.index[0] - pd.DateOffset(months=1))['datetime']
end = dtxt(result.index[-1] + pd.DateOffset(months=3))['datetime']
hbar = (f'\draw [dotted, thick] (axis cs:{{{start}}}, 5) -- '+
        f'(axis cs:{{{end}}}, 5);')
write_txt(text_dir / 'gli_hbar2.txt', hbar) 

# Text
ltdate = dtxt(result.index[-1])['qtr1']
totch = value_text(result['sum'].iloc[-1], adj='avg_ann', 
                   threshold=0.1)
wage = result['wage'].iloc[-1]
work = result['work'].iloc[-1]
    
txt2 = value_text(wage, 'contribution', 'pp')
txt3 = value_text(work, 'contribution', 'pp')

text = (f'{totch} over the year ending {ltdate}. Changes in wages {txt2}, '+
        f'and changes in total hours worked {txt3}.')
write_txt(text_dir / 'gli.txt', text)
print(text)

increased at an average annualized rate of 5.6 percent over the year ending 2023 Q3. Changes in wages contributed three percentage points, and changes in total hours worked contributed 2.5 percentage points.


### Employment rate - disability

In [11]:
# Retrieve series from FRED for comparison
#bls = fred_df('LNU02374597')
#bls = bls.rename({'VALUE': 'BLS'}, axis=1)
#pd.concat([bls, data], axis=1).plot();

In [12]:
# BLS data on age 16+
df = pd.read_csv(data_dir / 'jobs_report_main2.csv', index_col='date', 
                 parse_dates=True)['empdis'].dropna().rename('BLS')
df.to_csv(data_dir / 'dis_emp_rate_bls.csv', index_label='date', 
         header=True)
node = end_node(df, 'red', date='m', 
                size=1.1, offset=-0.2) 
write_txt(text_dir / 'dis_emp_node_bls.txt', node)

bdt = dtxt(df.index[-1])['mon1']
prdt = dtxt(df.index[-49])['mon1']
prdt2 = dtxt(df.index[-13])['mon1']
ltval = df.iloc[-1]
prch = ltval - df.iloc[-49]
vch = value_text(prch, 'increase_of', 
                 ptype='pp', threshold=0.1)
if prch > 3:
    vch = vch.replace('an increase of ', 'a jump of ')

vch2 = value_text(ltval - df.iloc[-13], 'increase_end', 
                  ptype='pp', threshold=0.1)

text = (f'As of {bdt}, BLS reports a {ltval:.1f} percent employment '+
        'rate for individuals aged 16 and over with at least one '+
        f'disability {c_line("red")}. This marks '+
        f'{vch2} over the past year, and {vch} since {prdt}.')
write_txt(text_dir / 'dis_rate_bls.txt', text)
print(text)

As of October 2023, BLS reports a 22.6 percent employment rate for individuals aged 16 and over with at least one disability (see {\color{red}\textbf{---}}). This marks a 0.6 percentage point increase over the past year, and a jump of 3.5 percentage points since October 2019.


In [13]:
# CPS data on more narrow age groups
columns = ['MONTH', 'YEAR', 'LFS', 'DISABILITY', 'BASICWGT', 'AGE', 'NILFREASON', 'FEMALE']
raw = (pd.concat([pd.read_feather(cps_dir / f'cps{year}.ft', columns=columns)
                 for year in range(2008, 2024)]))

d = {'Age2554': '25 <= AGE <= 54 and DISABILITY == 1',
     'Age1664': '16 <= AGE <= 64 and DISABILITY == 1',
     'Age55plus': 'AGE >= 55 and DISABILITY == 1'}
data = pd.DataFrame()

# Employment rate for each age group
for name, query in d.items():
    df = raw.query(query)
    data[name] = ((df.groupby(['YEAR', 'MONTH', 'LFS']).BASICWGT.sum() / 
                   df.groupby(['YEAR', 'MONTH']).BASICWGT.sum() * 100)
                  .unstack()['Employed'].dropna())
data.index = [pd.to_datetime(f'{i[0]}-{i[1]}-01') for i in data.index]

# End nodes 
s = {#'Age2554': 'orange', 
     'Age1664': 'blue'}
nodes  ='\n'.join([end_node(data[series], color, 
                            date='m', size=1.1, offset=-0.1) 
                   for series, color in s.items()])
write_txt(text_dir / 'dis_emp_nodes_cps.txt', nodes)

data.applymap('{:.1f}'.format).to_csv(data_dir / 'dis_emp_rate_cps.csv', 
            index_label='date', header=True)

# Count with disability
ltdt = dtxt(cps_date())['mon1']
cps_mo = cps_date().month
cps_yr = cps_date().year
tmp = raw.query('MONTH == @cps_mo and YEAR == @cps_yr')
td = tmp.query('DISABILITY == 1').BASICWGT.sum() / 1_000_000
td2 = tmp.query('DISABILITY == 1 and AGE < 65').BASICWGT.sum() / 1_000_000

text = (f'In {ltdt}, {td:.1f} million people age 16 and older '+
        f'report at least one such disability, of which {td2:.1f} '
        f'million are under age 65. ')
write_txt(text_dir / 'dis_rate.txt', text)
print(text)

dft = data['Age1664']
ltval = dft.iloc[-1]
ltdt = dtxt(dft.index[-1])['mon1']
prdt = dtxt(dft.index[-49])['year']
ch = dft.iloc[-1] - dft.iloc[-49]
valch = value_text(ch, 'increase_end', ptype='pp')
ch2 = dft.iloc[-1] - dft.iloc[-13]
valch2 = value_text(ch2, 'increase_of', ptype='pp', time_str='one-year ')
val13 = dft.loc['2013'].mean()
cl = c_line('blue')

text = ('For those age 16 to 64 with disabilities, the '+
        f'employment rate is {ltval:.1f} percent in {ltdt} {cl}, '+
        f'{valch2}, and {valch} since {prdt}.\n\n'+
        'In 2013, during the sluggish recovery from the great recession, '+
        'the employment rate for those age 16 to 64 with a disability '+
        f'averaged {val13:.1f} percent. ')
write_txt(text_dir / 'dis_rate_cps.txt', text)
print(text)

In October 2023, 33.6 million people age 16 and older report at least one such disability, of which 16.9 million are under age 65. 
For those age 16 to 64 with disabilities, the employment rate is 37.3 percent in October 2023 (see {\color{blue}\textbf{---}}), a one-year increase of 1.7 percentage points, and a 6.5 percentage point increase since 2019.

In 2013, during the sluggish recovery from the great recession, the employment rate for those age 16 to 64 with a disability averaged 26.8 percent. 


### Average Weekly Hours

In [14]:
hrs = {'CES0500000002': 'ceshrstot',
       'CES0600000002': 'ceshrsgoods',
       'CES0800000002': 'ceshrsserv',
       'CES0500000007': 'ceshrspns'}

df = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                   parse_dates=True)
df = df[hrs.keys()].rename(hrs, axis=1)

df2 = pd.concat([df, (pd.read_csv(data_dir / 'jobs_report_main2.csv', parse_dates=['date'])
        .set_index('date'))], axis=1)
cps = pd.read_csv(data_dir / 'uslhrs.csv', 
                  index_col='name', parse_dates=True)

data = pd.DataFrame()
data['TOTCES'] = df2['ceshrstot']
data['TOTLFS'] = df2['avghrstot']
data['SERVNSA'] = df2['avghrsserv']
data['SERVSA'] = x13_arima_analysis(df2['avghrsserv'].dropna()).seasadj
data['PNS'] = df2['ceshrspns']
data['PTECONNSA'] = df2['avghrsptecon']
data['PTECONSA'] = x13_arima_analysis(df2['avghrsptecon'].dropna()).seasadj
data['TOTCPS'] = cps['Total']
data['PA_CPSFT'] = cps['Age2554FT']
data['FT_CPS'] = cps['FT']
data['PA_CPSPT'] = cps['Age2554PT']
data['PT_CPS'] = cps['PT']

data.loc['1989':].to_csv(data_dir / 'hours.csv', index_label='date')

          found in the estimated spectrum of the regARIMA residuals.


In [15]:
# Retrieve data
data = pd.read_csv(data_dir / 'hours.csv', index_col='date', 
                   parse_dates=True)
ltval = data['TOTLFS'].iloc[-1]
ltdate = dtxt(data.index[-1])['mon1']
feb20val = data.loc['2020-02-01', 'TOTLFS']
compare = compare_text(ltval, feb20val, [0.2, 1.5, 3.0])
avg90 = data.loc['1998':'2000', 'TOTLFS'].mean()
gfclow = data.loc['2005': '2012', 'TOTLFS'].min()
gfclowdt = dtxt(data.loc['2005': '2012', 'TOTLFS'].idxmin())['mon1']

In [16]:
text = ('Actual hours worked by people at work in all industries '+
        f'during the survey reference week average {ltval:.1f} in {ltdate} '+
        '(see {\color{blue}\\textbf{---}}), '+
        f'{compare} the {feb20val:.1f} average actual hours worked in February '+
        f'2020. Average actual hours for this group average {avg90:.1f} from '+
        '1998 through 2000, and fell to a great recession low of '+
        f'{gfclow:.1f} in {gfclowdt}.')
write_txt(text_dir / 'hours_tot.txt', text)
print(text)

ltval2 = data.SERVSA.iloc[-1]
feb20val2 = data.loc['2020-02-01', 'SERVSA']
compare2 = compare_text(ltval2, feb20val2, [0.2, 0.6, 2.5])
pteval = data.PTECONSA.iloc[-1]
text = ('Those in service occupations work '+
        f'fewer hours on average, with {ltval2:.1f} average '+
        f'weekly hours in {ltdate}, {compare2} the {feb20val2:.1f} '+
        'average in February 2020. Those part-time '+
        'for economic reasons (see {\color{red!90!black}\\textbf{---}}) '+
        f'work an average of {pteval:.1f} hours per week in {ltdate}. ')
write_txt(text_dir / 'hours_lfs2.txt', text)
print(text)

ltval3 = data.PNS.iloc[-1]
feb20val3 = data.loc['2020-02-01', 'PNS']
compare3 = compare_text(ltval3, feb20val3, [0.2, 0.6, 2.5])
val98 = data.loc['1998':'2000', 'PNS'].mean()
compare4 = compare_text(ltval3, val98, [0.2, 0.6, 2.5])
text = (f'In {ltdate}, '+
        'production and non-supervisory workers (see {\color{orange}\\textbf{---}})'+
        ', about four of every five employees, '+
        f'worked {ltval3:.1f} hours per week on average, '+
        f'{compare3} the {feb20val3:.1f} average weekly hours in February 2020 and '+
        f'{compare4} the 1998--2000 average of {val98:.1f} hours.')
write_txt(text_dir / 'hours_ces.txt', text)
print(text)

Actual hours worked by people at work in all industries during the survey reference week average 38.4 in October 2023 (see {\color{blue}\textbf{---}}), slightly below the 38.8 average actual hours worked in February 2020. Average actual hours for this group average 39.6 from 1998 through 2000, and fell to a great recession low of 37.4 in February 2010.
Those in service occupations work fewer hours on average, with 34.8 average weekly hours in October 2023, slightly below the 35.2 average in February 2020. Those part-time for economic reasons (see {\color{red!90!black}\textbf{---}}) work an average of 23.3 hours per week in October 2023. 
In October 2023, production and non-supervisory workers (see {\color{orange}\textbf{---}}), about four of every five employees, worked 33.7 hours per week on average, in line with the 33.6 average weekly hours in February 2020 and substantially below the 1998--2000 average of 34.4 hours.


In [17]:
d = {'TOTCES': 'Total Actual, CES',
     'PNS': '\hspace{2mm} Production \& Non-Supervisory, CES ({\color{orange}\\textbf{---}} )',
     'TOTLFS': 'Total Actual, LFS ({\color{blue}\\textbf{---}})',
     'PTECONSA': '\hspace{2mm} Part-Time for Economic Reasons, LFS ({\color{red!90!black}\\textbf{---}})',
     'SERVSA': '\hspace{2mm} Services Occupations, LFS',
     'TOTCPS': 'Total Usual, CPS',
     'FT_CPS': '\hspace{2mm} Full-Time, All Ages, CPS',
     'PA_CPSFT': '\hspace{4mm} Full-Time, Age 25 to 54, CPS',
     'PT_CPS': '\hspace{2mm} Part-Time, All Ages, CPS',
     'PA_CPSPT': '\hspace{4mm} Part-Time, Age 25 to 54, CPS'}

dft = data[d.keys()].rename(d, axis=1)
tbl = dft.iloc[[-1, -2, -3, -13]].T
tbl.columns = [dtxt(c)['mon8'] for c in tbl.columns]
tbl['2019'] = dft.loc['2019'].mean()
tbl['2015'] = dft.loc['2015'].mean()
tbl['2010'] = dft.loc['2010'].mean()

tbl.round(1).to_csv(data_dir / 'hoursworked_table.tex', sep='&', 
             lineterminator='\\\ ', quotechar=' ')

### Pay - Productivity Gap

In [18]:
# Gather data
# Productivity
ndp = nipa_df(retrieve_table('T11706')['Data'], ['A362RX'])['A362RX']
hrs = pd.read_csv(data_dir / 'gdpjobslvl.csv', 
                   index_col='date', parse_dates=True)['TOT_HRS']
lprod = (ndp / hrs).rolling(4).mean().loc['1989':].dropna()
nprod = lprod / lprod.iloc[0]
nprod.name = 'Productivity'

# Wages - ECI
df = pd.read_csv(data_dir / 'eci.csv', index_col='date', 
                 parse_dates=True)[['IndexWS_All_SA_SIC', 'IndexWS']]
df0 = df.loc[:'2000-10-01', 'IndexWS_All_SA_SIC']
df1 = df.dropna().mean(axis=1) # Average of SIC and NAICs during overlap
df2 = df.loc['2006-01-01':, 'IndexWS']
res = pd.concat([df0, df1, df2])

defl = nipa_df(retrieve_table('T20304')['Data'], ['DPCERG'])['DPCERG']
rw = (res / defl)
rw = rw.rolling(4).mean().dropna().loc['1989':]
eci = rw / rw.loc['1989-10-01']
eci.name = 'Average'

# Wages - Median
nw = pd.read_csv(data_dir / 'uwe_cps.csv', index_col='date', 
                 parse_dates=True)['p50']
cpi = pd.read_csv(data_dir / 'cpi_raw.csv', 
                 index_col='date', parse_dates=True)['All items']
rw = x13_arima_analysis((nw / cpi).dropna().resample('QS').mean()).seasadj
rw = rw.rolling(4).mean().dropna()
rmw = rw / rw.iloc[0]
rmw.name = 'Median'

# Combine data
res = pd.concat([rmw, eci, nprod], axis=1) * 100
res.loc['1989-10-01':].to_csv(data_dir / 'payprod.csv', index_label='date')
resa = cagr(res.loc['1989-10-01':]) # Annual growth rate

In [19]:
# Text
ndpgr = value_text(cagr(ndp.loc['1989-10-01':]), 'plain')
prodgr = value_text(cagr(nprod.loc['1989-10-01':]), 'plain')
pop = nipa_df(retrieve_table('T20100')['Data'], ['B230RC'])['B230RC']
popgr = value_text(cagr(pop.loc['1989-10-01':]), 'plain')
wmdgr = value_text(cagr(res.Median.loc['1989-10-01':]), 'increase_by')
wmngr = value_text(cagr(res.Average.loc['1989-10-01':]), 'increase_by', 
                   casual=True).replace('grew', 'grown')

text = (f'Since 1989, annualized net output growth is {ndpgr}, net '+
        f'productivity growth is {prodgr}, and population growth is {popgr}.\n\n'+
        f'While the US has modest labor productivity growth over the past few '+
        'decades, wages have not kept pace. The average wage has '+
        f'{wmngr} per year since 1989, and the median wage has '+
        f'{wmdgr} per year. ')
write_txt(text_dir / 'payprod.txt', text)
print(text)

Since 1989, annualized net output growth is 2.3 percent, net productivity growth is 1.3 percent, and population growth is 0.9 percent.

While the US has modest labor productivity growth over the past few decades, wages have not kept pace. The average wage has grown by 0.7 percent per year since 1989, and the median wage has increased by 0.4 percent per year. 


In [20]:
# Settings for plot
cmax, cmin = resa.max(), resa.min()
thresh = (cmax - cmin) * 0.5 #Bigger bars labeled inside

v = {'Median': ['\\footnotesize Median (CPS)', 'green!90!blue', 'black', 0], 
    'Average': ['\\footnotesize Average (ECI)', 'blue!80!black', 'white', 1],
    'Productivity': ['\\footnotesize Net Labor \\ \\footnotesize Productivity', 
                     'cyan!90!white', 'black', 2.4]}
txt = []
for k, [name, color, tcolor, y] in v.items():
    x = resa[k].round(3)
    bar = f'\\addplot[{color}] coordinates {{({x}, {y})}};'
    vtc = 'black'
    tx = f'{resa[k]:.1f}\%'
    if abs(x) > thresh:  # Some value labels inside of bars
        vt = f'\scriptsize \color{{{tcolor}}} \\textbf{{{tx}}}'
        inside = True
    else:
        vt = f'\scriptsize {tx}'
        inside = False
    if x > 0:
        ytlab = 'left, align=right'
        vtlab = 'left, align=right' if inside == True else 'right, align=right'
    else:
        ytlab = 'right, align=left'
        vtlab = 'right, align=left' if inside == True else 'left, align=left'
    # Create ylabel and value label
    ylabel = f'\\node[{ytlab}, text width=2.0cm] at (axis cs:0,{y}) {{{name}}};'
    vlabel = f'\\node[{vtlab}] at (axis cs:{x},{y}) {{{vt}}};'
    txt.append(bar)
    txt.append(ylabel)
    txt.append(vlabel)
nodes = '\n'.join(txt)
write_txt(text_dir / f'payprod_bars.txt', nodes)