### Handle Charts Driven by Multiple Sources

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

### Wage Growth Summary Table

In [279]:
# Average Hourly Earnings
s = {'CES0500000003': 'AHE', 'CES0500000008': 'PNS', 
     'CES0600000008': 'Goods', 'CES0800000008': 'Serv'}
# AHE series
df = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True)[s.keys()].rename(s, axis=1)
dfg = df['Goods']
ch = df.pct_change(12) * 100

# Median wage from CPS
ch['UWE_P50'] = pd.read_csv(data_dir / 'uwe_cps.csv', index_col='date', 
                            parse_dates=True)['p50_gr']
# Wage Growth Tracker
ch['WGT'] = pd.read_csv(data_dir / 'atl_wgt.csv', 
                        index_col='date', parse_dates=True)['bd_cps']

# NIPA Wages and Salaries / NFP
df = pd.read_csv(data_dir / 'pi_raw.csv', index_col='date', 
                 parse_dates=True)
# Nonfarm payrolls
nfp = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True)['CES0000000001']
ch['NIPA'] = (df['A034RC'] / nfp).dropna().pct_change(12) * 100

# 3 Month Moving average for volatile series
for i in ['UWE_P50', 'WGT', 'NIPA']:
    ch[f'{i}_3m'] = ch[i].rolling(3).mean()
    
ch.loc['1989':].to_csv(data_dir / 'wages_yy_monthly.csv', 
                      index_label='date')

In [280]:
# Rename for table
d = {'AHE': 'Average Hourly Earnings (AHE), Private',
     'PNS': '\hspace{2mm} Production \& Nonsupervisory',
     'Goods': '\hspace{4mm} Goods-Producing Industries',
     'Serv': '\hspace{4mm} Service-Providing Industries',
     'UWE_P50': 'Usual Weekly Earnings, Median',
     'UWE_P50_3m': 'Usual Weekly Earnings, Median (3M Avg)',
     'WGT': 'Wage Growth Tracker, Median',
     'WGT_3m': 'Wage Growth Tracker, Median (3M Avg)',
     'NIPA': 'Wages \& Salaries, Average (NIPA)',
     'NIPA_3m': 'Wages \& Salaries, Average (3M Avg)'}

dfm = pd.read_csv(data_dir / 'wages_yy_monthly.csv', index_col='date', 
                 parse_dates=True)[d.keys()].rename(d, axis=1)

# Latest 6 months
res = dfm.iloc[-6:].iloc[::-1].T
# Same month, prior two years
for i in [-13, -25]:
    res[dfm.index[i]] = dfm.iloc[i]
    
res.columns = [dtxt(dt)['mon6'] for dt in res.columns]
res = res.applymap('{:.1f}'.format).replace('nan', '--')
(res.to_csv(data_dir / 'wages_yy_monthly.tex', sep='&', lineterminator='\\\ ', 
           quotechar=' '))

In [281]:
# Quarterly measures of wages 
# Median wage from BLS 
d = {'IndexWS': 'Wages \& Salaries (ECI)',
     'P50_tot': 'Usual Weekly Earnings, Median',
     #'P50_men': '\hspace{2mm} Men, Age 16+',
     #'P50_women': '\hspace{2mm} Women, Age 16+',
     'nfbus_ulc': 'Unit Labor Cost'}
df = (pd.read_csv(data_dir / 'uwe_main.csv', index_col='date', 
                            parse_dates=True)) 

# Wages and Salaries from Employment Cost Index
eci = pd.read_csv(data_dir / 'eci.csv', index_col='date', 
                 parse_dates=True)[['IndexWS','IndexWSGoods']]

# Unit Labor Costs
ulc = pd.read_csv(data_dir / 'lprod.csv', index_col='date', 
                 parse_dates=True)[['business_ulc', 'manuf_ulc', 'nfbus_ulc']]

df = pd.concat([df, eci, ulc], axis=1)

# Average Hourly Earnings PNS, Goods
df['AHEGoods'] = dfg.resample('QS').mean()

dfy = df.pct_change(4, fill_method=None) * 100 
dfy.loc['1989':].to_csv(data_dir / 'wages_yy_quarterly.csv', 
                      index_label='date')
dfq = df.pct_change(fill_method=None) * 400 

# Gender Wage Gap
gwg = (df['P50_women'] / df['P50_men']).rolling(4).mean()
gwg.name = 'GWG'
gwg.loc['1989':].to_csv(data_dir / 'gwg.csv', 
                      index_label='date')
node = end_node(gwg, 'red', date='qs', offset=0.1,
                anchor='south', align='center', colon=False, digits=2)
write_txt(text_dir / 'gwg_node.txt', node)

# Sumary Table
# Latest 5 quarters
res = dfy.iloc[-5:].iloc[::-1].T
# Same quarter, prior two years
for i in [-9, -13, -17]:
    res[dfy.index[i]] = dfy.iloc[i]
    
res = res.loc[d.keys()].rename(d)
res.columns = [dtxt(dt)['qtr4'] for dt in res.columns]
res = res.applymap('{:.1f}'.format).replace('nan', '--')
(res.to_csv(data_dir / 'wages_yy_quarterly.tex', sep='&', lineterminator='\\\ ', 
           quotechar=' '))

In [282]:
# Gender Wage Gap Text
ltdt = dtxt(gwg.index[-1])['qtr1']
gap = gwg.iloc[-1] * 100
gap2 = 100 - gap
cmp = '2019-10-01'
cmpdt = dtxt(cmp)['qtr1']
gapcmp = (1.0 - gwg.loc['2019-10-01']) * 100
dt1 = '1990-01-01'
dt2 = '2004-10-01'
dt3 = '2005-01-01'
dt4 = cmp

# Annualize rates (divide by 59 quarters)
hgap1 = value_text((gwg.loc[dt2] - gwg.loc[dt1]) * (100 / (59 / 4)), 
                   'plain', 'pp', digits=2)
hgap2 = value_text((gwg.loc[dt4] - gwg.loc[dt3]) * (100 / (59 / 4)), 
                   'plain', 'pp', digits=2)

text = ('From 1990 to 2004 Q4, the gender wage gap closed at '+
        f'an annual rate of {hgap1} per year. From 2005 to 2019 Q4, '+
        f'the gap closed at an annual rate of only {hgap2} per year.\n\n'+
        f'Over the year ending {ltdt}, the gender wage gap is {gap2:.1f} '+
        f'percent. That is, women typically make {gap:.1f} cents for '+
        f'every dollar a man makes. In 2019 Q4, the gap was {gapcmp:.1f} '+
        'percent.')
write_txt(text_dir / 'gwg.txt', text)
print(text)

From 1990 to 2004 Q4, the gender wage gap closed at an annual rate of 0.66 percentage point per year. From 2005 to 2019 Q4, the gap closed at an annual rate of only 0.08 percentage point per year.

Over the year ending 2023 Q3, the gender wage gap is 16.4 percent. That is, women typically make 83.6 cents for every dollar a man makes. In 2019 Q4, the gap was 18.4 percent.


In [283]:
# Summary text 
d, d2 = {}, {} # One-year change and change since 2019
# Monthly series
for s in ['AHE', 'PNS', 'Goods', 'WGT', 'WGT_3m']:
    d[s] = ch[s].dropna().diff(12).iloc[-1]
    d2[s] = ch[s].dropna().iloc[-1] - ch.loc['2019', s].mean()
    
# Quarterly Series    
for s in ['IndexWS', 'IndexWSGoods', 'p50uwe']:
    d[s] = dfy[s].dropna().diff(4).iloc[-1]
    d2[s] = dfy[s].dropna().iloc[-1] - dfy.loc['2019', s].mean()
    
chlt = pd.DataFrame({'One-year': d, 'Since 2019': d2})
keych = chlt.loc[['IndexWS', 'WGT_3m']].mean()
chlt.loc['keych'] = keych
chdf = pd.concat([pd.cut(chlt[c], [-50, -0.49, 0.49, 50], 
                         labels=['below', 'same', 'above']) 
           for c in chlt.columns], axis=1)

ref = chdf.drop('keych')
keym = chdf.loc['keych']

# What percent of the series match the key series?
pct = pd.DataFrame(map(lambda k: ref[k]==keym[k], ref)).all().mean()

keydir1 = ('falling' if keych['One-year'] <= -0.49 else 'rising' 
           if keych['One-year'] >=0.49 else 'stable')
keydir2 = ('below' if keych['Since 2019'] <= -0.49 else 'above' 
           if keych['Since 2019'] >=0.49 else 'in line with')

keyt = 'key'
if pct > 0.8:
    keyt = 'nearly all'
elif pct > 0.5:
    keyt = 'most'
    
but = 'but still' if ((keydir1 == 'falling') & (keydir2 == 'above') | 
                (keydir1 == 'rising') & (keydir2 == 'below')) else 'and'    

text = (f'{keyt} measures show one-year nominal wage growth that is {keydir1} '+
        f'{but} {keydir2} the pre-pandemic rate.')
write_txt(text_dir / 'wage_rec_summary.txt', text)
print(text)

most measures show one-year nominal wage growth that is falling but still above the pre-pandemic rate.
