CES Example

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

In [2]:
# Industry codes
ind_url = 'https://download.bls.gov/pub/time.series/ce/ce.industry'
ind = pd.read_table(ind_url, sep='\t')
ind_g1 = ind[(ind['naics_code'].str.len() == 3)]
ind_g2 = ind[(ind['naics_code'].str.len() != 3) & 
             (ind['display_level'] == 4) & 
             (ind['publishing_status'] != 'C') &
             (ind['naics_code'] != '621,2,3') &
             (ind['naics_code'] != '-')]
ind_g3 = ind[(ind['naics_code'].str.len() == 2) & 
             (~ind['naics_code'].isin(ind_g1.naics_code.str[:2].unique()))]

indg = pd.concat([ind_g1, ind_g2, ind_g3])

# Series IDs
srs_url = 'https://download.bls.gov/pub/time.series/ce/ce.series'
converters = {'series_id        ': lambda x: x.strip()}
srs = pd.read_table(srs_url, sep='\t', converters=converters)
srs.columns = srs.columns.str.strip()

# AHE and Payrolls for 3 digit industries, NSA
ind_grps = indg.merge(srs).query('data_type_code in [1, 3] and seasonal == "U"')

# Supersector
ss = {'90000000': 'Government',
      '06000000': 'Private Goods-Producing',
      '08000000': 'Private Service-Producing'}
ss = {f'CEU{k}01':v for k,v in ss.items()}

# Seleted other series
s_list = ['CEU0000000001', 'CES0000000001', 'CEU0500000006', 
          'CEU0500000001', 'CES0500000006', 'CES0500000001'] + list(ss.keys())
others = srs[srs['series_id'].isin(s_list)]
sel = pd.concat([ind_grps, others])
sel.to_csv(data_dir / 'ces_meta_raw.csv')

In [3]:
# Full CES dataset
url = 'https://download.bls.gov/pub/time.series/ce/ce.data.0.AllCESSeries'
raw = pd.read_table(url, sep='\t', converters=converters)
raw.columns = raw.columns.str.strip()

# Annual data
sd = raw.query('series_id in @sel.series_id and year > 1987')
sdy = sd.query('period == "M13"').set_index(['year', 'series_id'])['value'].unstack()
sdy.index = pd.to_datetime(dict(year=sdy.index, month=7, day=1))
sdy.to_csv(data_dir / 'ces_ann_raw.csv', index_label='date')

#Monthly data
idx = ['year', 'period', 'series_id']
sdm = sd.query('period != "M13"').set_index(idx)['value'].unstack()
sdm.index = [pd.to_datetime(f'{year}-{period[1:]}-01') 
             for year, period in sdm.index]
sdm.to_csv(data_dir / 'ces_raw.csv', index_label='date')

### Job Growth by Sector

In [4]:
sdm = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                  parse_dates=True)
# Supersector
ss = {'90000000': 'Gov.',
      '06000000': 'Goods',
      '08000000': 'Services'}
ss = {f'CEU{k}01':v for k,v in ss.items()}
df = sdm[ss.keys()]
res = (df.iloc[-1] - 
       df.loc[pd.to_datetime(f'{2019}-{df.index[-1].month}-01')]).rename(ss)

# Bar plot
ssd = {'Gov.': 'green!85!yellow!75!black',
       'Goods': 'blue!80!black',
       'Services': 'cyan!80!white'}

bars = '\n'.join([f'\\addplot[{ssd[n]}]coordinates {{({i+1},{res[n]})}};' 
                  for i, n in enumerate(ssd.keys())])
write_txt(text_dir / 'emp_ss_bars.txt', bars)

ltdt = dtxt(sdm.index[-1])['mon1']
tot = value_text(res.sum() * 1_000, ptype=None, adj='total', digits=0)
gov = value_text(res['Gov.'] * 1_000, 'increase_by', ptype=None, 
                 digits=0, casual=True)
priv = value_text((res.sum() - res['Gov.']) * 1_000, ptype=None, 
                  adj='total', digits=0)
goods = value_text(res['Goods'] * 1_000, 'added_lost', ptype=None, 
                   digits=0)
serv = (value_text(res['Services'] / 1_000, 'added_lost', 
                   ptype=None, digits=1) + ' million')
if res.Services < 1_000:
    serv = value_text(res['Services'] * 1_000, 'added_lost', 
                      ptype=None, digits=0)
text = (f'Over the three years ending {ltdt}, nonfarm payrolls '+
        f'{tot}. By sector, combined government payrolls {gov} {c_box(ssd["Gov."])}, '+
        f'and private payrolls {priv} over the three-year period. Private '+
        f'goods-producing industries {goods} jobs {c_box(ssd["Goods"])}, and private service-'+
        f'providing industries {serv} jobs {c_box(ssd["Services"])}. ')
write_txt(text_dir / 'nfp_ss.txt', text)
print(text)

Over the three years ending September 2022, nonfarm payrolls increased by a total of 1,562,000. By sector, combined government payrolls fell by 307,000 (see\cbox{green!85!yellow!75!black}), and private payrolls increased by a total of 1,869,000 over the three-year period. Private goods-producing industries added 127,000 jobs (see\cbox{blue!80!black}), and private service-providing industries added 1.7 million jobs (see\cbox{cyan!80!white}). 


### Job Growth By 2019 Wage Tercile

In [5]:
sel = pd.read_csv(data_dir / 'ces_meta_raw.csv')
d = sel.set_index('series_id')['industry_name'].dropna().to_dict()

v19 = pd.read_csv(data_dir / 'ces_ann_raw.csv', index_col='date', 
                 parse_dates=True).loc['2019-07-01', d.keys()]

c = {'ahe': '3', 'emp': '1'}
df = pd.DataFrame({n: v19[v19.index.str.endswith(i)].rename(d) 
                   for n, i in c.items()})

# Latest data and three-year prior
lt = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True).iloc[[-1, -37]].T.loc[d.keys()]
df[['emplt', 'emppr']] = lt[lt.index.str.endswith('1')].rename(d)
df['diff'] = df['emplt'] - df['emppr']


# Split into three wage groups
dft = df.dropna()
df['cdf']  = dft.sort_values('ahe').emp.cumsum() / dft.emp.sum()
grps = (lambda x: np.where(x.cdf < (1/3), 'Lowest Third', 
                  np.where((x.cdf >= (1/3)) & (x.cdf < (2/3)), 'Middle Third', 
                  np.where((x.cdf >= (2/3)) & (x.cdf <= 1), 'Highest Third', 'none'))))

df = df.assign(grps = grps)

res = df.groupby('grps')['diff'].sum().drop('none').round()

(res.to_csv(data_dir / 'jobs_tercile.csv', index_label='grps', 
            header=True))

# Footer for industries missing wage data
nonech = value_text(df.groupby('grps')['diff'].sum()['none'] * 1_000, 
                    'added_lost', ptype=None, digits=0)
text = (f'Private industries without wage information {nonech} '+
        'jobs over the period.')
write_txt(text_dir / 'jobs_nowage_ch.txt', text)
emp3dt = f"{dtxt(sdm.index[-37])['mon1']} to {dtxt(sdm.index[-1])['mon1']}"
write_txt(text_dir / 'emp_3_dates.txt', emp3dt)

# Text
low = value_text(res['Lowest Third'] * 1000, 'added_lost', ptype=None, digits=0)
mid = value_text(res['Middle Third'] * 1000, 'added_lost', ptype=None, 
                 digits=0, casual=True)
high = (value_text(res['Highest Third'] / 1000, 'added_lost', ptype=None, 
                   digits=2) + ' million')
if res['Highest Third'] < 1000:
    high = value_text(res['Highest Third'] * 1000, 'added_lost', 
                      ptype=None, digits=0)
prdt = dtxt(lt.columns[1])['mon1']
text = ('Dividing the private industries into three wage groups, the '+
        f'lowest-wage industries {low} jobs since {prdt}, the middle-wage '+
        f'industries {mid} jobs, and the highest-wage industries {high} '+
        f'jobs {c_box("violet!70!magenta")}. ')
write_txt(text_dir / 'nfp_wg.txt', text)
print(text)

Dividing the private industries into three wage groups, the lowest-wage industries lost 194,000 jobs since September 2019, the middle-wage industries gained 160,000 jobs, and the highest-wage industries added 1.84 million jobs (see\cbox{violet!70!magenta}). 


### Overview Jobs Growth and Monthly Change

In [6]:
sl = {'CEU0000000001': 'NFP', 'CES0000000001': 'NFPsa', 
      'CEU0500000006': 'PNS', 'CES0500000006': 'PNSsa',
      'CEU0500000001': 'PNFP', 'CES0500000001': 'PNFPsa'}
# CES series from flat/text files
df = pd.read_csv(data_dir / 'ces_raw.csv', index_col='date', 
                 parse_dates=True).loc[:, sl.keys()].rename(sl, axis=1)
# Add LFS series from jobs report
df[['EMP', 'EMPsa']] = (pd.read_csv(data_dir / 'jobs_report_main2.csv', 
                                   index_col='date', parse_dates=True)
                          .loc[:, ['EMP', 'EMPsa']])
# Save results for time series graph
res = df.loc['1989':].divide(1000)
res.to_csv(data_dir / 'emp_ts.csv', index_label='date')

# End nodes for time series graph
res2 = res[['NFP', 'EMP', 'PNS']]
adj = node_adj(res2)
smax = res2.iloc[-1].idxmax()
adj[smax] = adj[smax] + 0.35
cols = {'NFP': 'blue!70!white', 'EMP': 'magenta',
        'PNS': 'orange!80!yellow'}
date = {series: 'm' if series in ['EMP'] else None 
        for series in cols.keys()}
nodes  ='\n'.join([end_node(res2[series].dropna(), color, date=date[series], 
                            full_year=True, size=1.1, offset=adj[series]) 
                   for series, color in cols.items()])
write_txt(text_dir / 'emp_ts_nodes.txt', nodes) 

# Latest values bar graph
sa = df[['NFPsa', 'EMPsa']]
final2 = sa.diff().loc['2019'].mean().to_frame().T
final2['label'] = '2019 Average'
final = sa.diff().iloc[-3:]
final['label'] = [dtxt(i)['mon2'] for i in final.index]
final = pd.concat([final2, final])
final.to_csv(data_dir / 'emp_lt.csv', index_label='date')

In [7]:
ltdt = dtxt(res.index[-1])['mon1']
lt = res.iloc[-1]
pc = res.loc[:'2020-03-01']
pnssh = (lt.PNS / lt.PNFP) * 100

cl = {n: c_line(col) for n, col in cols.items()}
text = (f'In {ltdt}, establishments report {lt.NFP:.1f} million '+
        f'\\textbf{{nonfarm payroll employees}} {cl["NFP"]}. '+
        f'The pre-COVID peak was {pc.NFP.max():.1f} million '+
        f'in {dtxt(pc.NFP.idxmax())["mon1"]}. '+
        f'Households report {lt.EMP:.1f} million employed '+
        'people, including the self-employed but not including '+
        'armed forces, in the latest month, compared to a pre-COVID '+
        f'peak of {pc.EMP.max():.1f} million {cl["EMP"]}. \n\nPrivate '+
        'production and nonsupervisory workers are engaged in '+
        'production, including working supervisors, or in other '+
        'activities but not above the working supervisor level. '+
        f'In {ltdt}, this group totals {lt.PNS:.1f} million, compared '+
        f'to a pre-COVID peak of {pc.PNS.max():.1f} million {cl["PNS"]}. '+
        f'Production and nonsupervisory workers comprise {pnssh:.1f} '+
        f'percent of private nonfarm payrolls in {ltdt}. ')
write_txt(text_dir / 'emp_overview1.txt', text)
print(text)

lc = res.diff().iloc[-1] * 1_000_000
ltemp = value_text(lc.EMPsa, 'increase_by', ptype=None, digits=0)
a19 = res.diff().loc['2019'].mean() * 1_000_000
empc = compare_text(lc.EMPsa, a19.EMPsa, [30000, 50000, 150000])
nfpal = 'added' if lc.NFPsa >= 0 else 'lost'
a3m = value_text(round(res[['NFPsa', 'EMPsa']].diff().iloc[-3:].mean().mean()
                       * 1_000_000, -2), 
                 'increase_of', ptype=None, digits=0)
text = (f'In {ltdt}, seasonally-adjusted civilian employment {ltemp} '+
        f'{c_box(cols["EMP"])}, {empc} the 2019 average increase of '+
        f'{a19.EMPsa:,.0f} jobs per month. The US {nfpal} a net total '+
        f'of {lc.NFPsa:,.0f} nonfarm payroll jobs in {ltdt} '+
        f'{c_box(cols["NFP"])}, compared to a monthly average of '+
        f'{a19.NFPsa:,.0f} in 2019. The average of both surveys over '+
        f'the past three months shows {a3m} employees per month.  ')
write_txt(text_dir / 'emp_overview2.txt', text)
print('\n', text)

In September 2022, establishments report 153.1 million \textbf{nonfarm payroll employees} (see {\color{blue!70!white}\textbf{---}}). The pre-COVID peak was 153.1 million in November 2019. Households report 159.0 million employed people, including the self-employed but not including armed forces, in the latest month, compared to a pre-COVID peak of 159.1 million (see {\color{magenta}\textbf{---}}). 

Private production and nonsupervisory workers are engaged in production, including working supervisors, or in other activities but not above the working supervisor level. In September 2022, this group totals 106.4 million, compared to a pre-COVID peak of 106.9 million (see {\color{orange!80!yellow}\textbf{---}}). Production and nonsupervisory workers comprise 81.4 percent of private nonfarm payrolls in September 2022. 

 In September 2022, seasonally-adjusted civilian employment increased by 204,000 (see\cbox{magenta}), slightly above the 2019 average increase of 167,083 jobs per month. The

In [8]:
d = {'EMP': 'Employed', 'NFP': 'Nonfarm Payrolls',
     'PNFP': '\hspace{1mm} Private Nonfarm Payrolls',
     'PNS': '\hspace{2mm} Production \& Nonsuperv.'}
d = {**{f'{k}sa': v for k,v in d.items()}, **d}

g1 = ['EMP', 'NFP', 'PNFP', 'PNS']
g2 = [f'{i}sa' for i in g1]

r2 = res.copy()
r2.index = [dtxt(i)['mon2'] for i in r2.index]

tbl = pd.concat([r2[g2].iloc[-2:].T.iloc[:, ::-1].rename(d), 
                 r2[g1].iloc[-2:].T.iloc[:, ::-1].rename(d)], axis=1)
tbl['2019 Avg.'] = res.loc['2019', g1].mean().rename(d)
tbl['2017 Avg.'] = res.loc['2017', g1].mean().rename(d)
tbl['2000 Avg.'] = res.loc['2000', g1].mean().rename(d)

tbl.applymap('{:.1f}'.format).to_csv(data_dir/'emp_overview.tex', sep='&', 
             line_terminator='\\\ ', quotechar=' ')

### Payrolls by Industry Summary Table

In [9]:
s = {'CEU0000000001': 'Total nonfarm',
     'CEU0500000001': '\hspace{1mm} Total private',
     'CEU0600000001': '\hspace{2mm} Goods-producing',
     'CEU1000000001': '\hspace{4mm} Mining \& logging',
     'CEU2000000001': '\hspace{4mm} Construction',
     'CEU3000000001': '\hspace{4mm} Manufacturing',
     'CEU0800000001': '\hspace{2mm} Private service-providing',
     'CEU4142000001': '\hspace{4mm} Wholesale trade',
     'CEU4200000001': '\hspace{4mm} Retail trade',
     'CEU4300000001': '\hspace{4mm} Transportation \& warehousing',
     'CEU5000000001': '\hspace{4mm} Information',
     'CEU5500000001': '\hspace{4mm} Financial activities',
     'CEU5553000001': '\hspace{4mm} Real estate \& rental \& leasing',
     'CEU6054000001': '\hspace{4mm} Professional \& technical services',
     'CEU6055000001': '\hspace{4mm} Management',
     'CEU6056100001': '\hspace{4mm} Administrative \& support',
     'CEU6561000001': '\hspace{4mm} Educational services',
     'CEU6562000101': '\hspace{4mm} Health care',
     'CEU6562400001': '\hspace{4mm} Social assistance',
     'CEU7071000001': '\hspace{4mm} Arts, entertainment, \& recreation',
     'CEU7072100001': '\hspace{4mm} Accommodation',
     'CEU7072200001': '\hspace{4mm} Food services \& drinking places',
     'CEU8000000001': '\hspace{4mm} Other services',
     'FromData': '\hspace{4mm} Utilities and waste management',
     'CEU9000000001': '\hspace{1mm} Government'}
s_other = ['CEU4422000001', 'CEU6056200001']

In [10]:
# Not seasonally adjusted data
res = raw.query('(series_id in @s.keys() or series_id in @s_other) '+
                'and year > 1988 and period != "M13"')
res = res.set_index(['year', 'period', 'series_id'])['value'].unstack()
res.index = [pd.to_datetime(f'{year}-{period[1:]}-01') 
             for year, period in res.index]
res[s['FromData']] = res[s_other].sum(axis=1)
res = res.drop(s_other, axis=1)
res = res.rename(s, axis=1)
tbl = pd.DataFrame({ltdt: res.iloc[-1], 
                    '3-year change': res.diff(36).iloc[-1]}).loc[s.values()]
# Seasonally adjusted data
s2 = {k.replace('CEU', 'CES'): v for k,v in s.items()}
s2_other = [i.replace('CEU', 'CES') for i in s_other]
res2 = raw.query('(series_id in @s2.keys() or series_id in @s2_other) '+
                'and year > 1988 and period != "M13"')
res2 = res2.set_index(['year', 'period', 'series_id'])['value'].unstack()
res2.index = [pd.to_datetime(f'{year}-{period[1:]}-01') 
             for year, period in res2.index]
res2[s['FromData']] = res2[s2_other].sum(axis=1)
res2 = res2.drop(s2_other, axis=1)
res2 = res2.rename(s2, axis=1)
ltdr = f"{dtxt(res2.index[-3])['mon6']}-- {dtxt(res2.index[-1])['mon6']} average"
prdr = f"{dtxt(res2.index[-6])['mon6']}-- {dtxt(res2.index[-4])['mon6']} average"
tbl2 = pd.DataFrame({f'{ltdt} ': res2.iloc[-1], 
                     '1-month change': res2.diff().iloc[-1],
                     ltdr: res2.diff().iloc[-3:].mean(),
                     prdr: res2.diff().iloc[-6:-3].mean()}).loc[s2.values()]
table = tbl2.join(tbl).applymap('{:,.0f}'.format).replace('-0', '0')
table.index.name = ''
table.to_csv(data_dir/'nfp.tex', sep='&', 
             line_terminator='\\\ ', quotechar=' ')

In [11]:
st = res.diff(36).iloc[-1].sort_values()
st.index = [i.split('} ')[-1].replace('\&', 'and').lower() 
            for i in st.index]
excats = ['government', 'other services', 'total nonfarm', 
          'total private', 'private service-providing', 
          'goods-producing']
st = (st.multiply(1000).apply('{:+,.0f}'.format)
        .drop(excats))

ltdt = dtxt(res.index[-1])['mon1']
high = [f'{st.index[i]} ({st.iloc[i]})' for i in [-1, -2, -3, -4]]
htxt = ', and '.join([', '.join(high[:3]), high[3]])
low = [f'{st.index[i]} ({st.iloc[i]})' for i in [0, 1, 2]]
ltxt = ', and '.join([', '.join(low[:2]), low[2]])

text = (f'Over the three years ending {ltdt}, the industry '+
        'groups with the largest increase in payrolls were '+
        f'{htxt}. The private industry groups with the least '+
        f'least job growth were {ltxt}.')
write_txt(text_dir / 'emp_ind.txt', text)
print(text)

Over the three years ending September 2022, the industry groups with the largest increase in payrolls were professional and technical services (+941,200), transportation and warehousing (+773,200), retail trade (+217,600), and health care (+185,200). The private industry groups with the least least job growth were accommodation (-362,200), food services and drinking places (-334,100), and arts, entertainment, and recreation (-129,400).
