In [22]:
import pandas as pd
from pathlib import Path
import requests
import io
import os
import sys
import numpy as np
current_dir = os.getcwd()
utils_dir = os.path.join(current_dir, '..', 'utils')
sys.path.append(utils_dir)
from chartspecs import *
from api_call import *

In [23]:
# Tools to retrieve flat files from BLS
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
           'Accept-Encoding': 'gzip, deflate, br',
           'Accept-Language': 'en-US,en;q=0.5',
           'Connection': 'keep-alive'}

### CPI

https://www.bls.gov/cpi/tables/relative-importance/#RIData

Retrieve and store latest relative weights. 
Use latest data form here. 

This is the latest link: 

https://www.bls.gov/cpi/tables/relative-importance/2023.htm

In [24]:
wgt_dt = '2023-12-01'
url = 'https://www.bls.gov/cpi/tables/relative-importance/2023.htm'
r = requests.get(url, headers=headers)
t = pd.read_html(io.StringIO(r.content.decode('utf-8')),header=0, index_col=0)
t[0].columns = t[0].columns.str.replace('U.S. City Average, ', '', regex=True)
file_path = os.path.join(data_dir, 'cpi_rel_wgts_raw.csv')
t[0].dropna().to_csv(file_path, index_label=wgt_dt)

In [25]:
t[0].dropna()

Unnamed: 0_level_0,CPI-U,CPI-W
Item and Group,Unnamed: 1_level_1,Unnamed: 2_level_1
All items,100.000,100.000
Food and beverages,14.409,15.950
Food,13.555,15.235
Food at home,8.167,9.572
Cereals and bakery products,1.066,1.258
...,...,...
Commodities less food and energy commodities,18.891,19.123
Energy commodities,3.539,4.612
Services less energy services,60.899,57.427
Domestically produced farm food,6.798,7.832


#### Series names and display level

In [26]:
# Retrieve item names and codes
url = 'https://download.bls.gov/pub/time.series/cu/cu.item'
r = requests.get(url, headers=headers)
codes = (pd.read_table(io.StringIO(r.content.decode('utf-8')), index_col=0)
           .loc[:, ['item_name', 'display_level']])
file_path = os.path.join(data_dir, 'cpi_codes.csv')
codes.to_csv(file_path)

In [27]:
codes

Unnamed: 0_level_0,item_name,display_level
item_code,Unnamed: 1_level_1,Unnamed: 2_level_1
AA0,All items - old base,0
AA0R,Purchasing power of the consumer dollar - old ...,0
SA0,All items,0
SA0E,Energy,1
SA0L1,All items less food,1
...,...,...
SSEA011,College textbooks,3
SSEE041,Smartphones,4
SSFV031A,Food at elementary and secondary schools,3
SSGE013,Infants' equipment,3


#### Data

In [28]:
# Selected series to retrieve from API
nsa = 'CUUR0000'
sa = 'CUSR0000'
lt = ['SA0', 'SAF1', 'SAH1', 'SACL1E', 'SASLE', 'SEHA', 
      'SA0E', 'SA0L1E', 'SETB01', 'SETA01', 'SETA02', 'SAE1',
      'SAM']
lts = ['SA0', 'SA0L1E']
st = ['SAH', 'SEFV', 'SAF11', 'SAR',  'SAT', 'SAA', 'SAE2', 
      'SAG1', 'SEHC', 'SAH3', 'SEMD', 'SEMC', 'SEME',
      'SETB', 'SETG', 'SAH21', 'SEHB', 'SEFV01', 'SEFV02',
      'SEEB01', 'SEEB03', 'SEED03', 'SEEE03', ]
file_path = os.path.join(data_dir, 'cpi_codes.csv')
codes = pd.read_csv(file_path, index_col=0)
code_names = codes['item_name'].to_dict().items()

# Retrieve recent data from API 
dst = {nsa + code: name for code, name in code_names 
       if code in st}
years = (2015, 2024)
dfs = bls_api(dst, years)

# Retrieve recent data from API (SA)
dst2 = {sa + code: name + ' (SA)' for code, name in code_names 
       if code in st}
years = (2015, 2024)
dfs2 = bls_api(dst2, years)

# Retrieve recent data from API (SA)
dst3 = {sa + code: name + ' (SA)' for code, name in code_names 
       if code in lt and code not in lts}
years = (2015, 2024)
dfs3 = bls_api(dst3, years)

# Retrieve long-term data from API 
dlt = {nsa + code: name for code, name in code_names 
       if code in lt}
dlts = {sa + code: name + ' (SA)' for code, name in code_names 
        if code in lts}
years = (1988, 2024)
dfl = bls_api({**dlt, **dlts}, years)


Post Request Status: REQUEST_SUCCEEDED
Post Request Status: REQUEST_SUCCEEDED
Post Request Status: REQUEST_SUCCEEDED
Post Request Status: REQUEST_SUCCEEDED


In [29]:
file_path = os.path.join(data_dir, 'cpi_raw.csv')
dfl.join(dfs).join(dfs2).join(dfs3).to_csv(file_path, index_label='date')


In [30]:
dfl.join(dfs).join(dfs2).join(dfs3)

Unnamed: 0,All items,Energy,All items less food and energy,Commodities less food and energy commodities,Education,Food,Shelter,Medical care,Services less energy services,Rent of primary residence,...,Commodities less food and energy commodities (SA),Education (SA),Food (SA),Shelter (SA),Medical care (SA),Services less energy services (SA),Rent of primary residence (SA),New vehicles (SA),Used cars and trucks (SA),Gasoline (all types) (SA)
1988-01-01,115.700,87.400,120.800,113.200,,115.700,124.600,134.400,125.200,126.000,...,,,,,,,,,,
1988-02-01,116.000,87.000,121.100,113.300,,115.700,125.000,135.500,125.700,126.300,...,,,,,,,,,,
1988-03-01,116.500,86.500,121.900,114.600,,115.900,125.600,136.300,126.100,126.400,...,,,,,,,,,,
1988-04-01,117.100,87.300,122.400,115.500,,116.600,125.800,136.900,126.500,126.600,...,,,,,,,,,,
1988-05-01,117.500,88.700,122.700,115.500,,117.000,126.200,137.500,126.900,126.900,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-01,307.789,296.004,310.817,167.141,294.357,324.704,385.433,548.431,401.234,404.487,...,166.716,292.491,324.356,385.370,548.158,401.323,404.656,179.361,187.650,332.019
2023-10-01,307.671,286.754,311.380,166.759,294.084,325.731,386.435,549.762,402.549,406.683,...,166.670,292.481,325.312,386.675,549.491,402.671,406.561,179.247,186.879,317.678
2023-11-01,307.051,277.029,311.606,165.367,293.674,325.172,387.892,551.769,404.143,408.838,...,166.301,292.679,325.870,388.398,552.182,404.518,408.366,179.222,189.444,304.982
2023-12-01,306.746,269.375,311.907,164.590,294.040,325.409,389.433,553.485,405.338,410.606,...,166.194,293.506,326.545,389.979,554.295,406.073,409.972,179.551,190.570,303.242


#### Monthly CPI Inflation

In [31]:
df = pd.read_csv(os.path.join(data_dir, 'cpi_raw.csv'), index_col='date', 
                parse_dates=True)
s = df.rename({'All items (SA)': 'ALL_S'}, axis=1)[['ALL_S']]
data = s.pct_change() * 100
# Last row is empty for nowcast
next_mo = data.index[-1] + pd.DateOffset(months=1)
data.loc[next_mo, 'ALL_S'] = np.nan
data['label'] = [dt.strftime('%b \'%y') if dt.month == 1 
                 else dt.strftime('%b') for dt in data.index]
data['label2'] = [dt.strftime('%b %Y') if dt.month == 1 
                  else dt.strftime('%b') if dt.month in [4, 7, 10]
                  else '' for dt in data.index]
data['FILL'] = 0
data.iloc[-20:].to_csv(os.path.join(data_dir, 'cpi_monthly.csv'), 
                         index_label='date', float_format='%g')

In [33]:
ltdate = dtxt(data.index[-2])['mon1']
prdate = dtxt(data.index[-3])['mon1']
ltval = float(data.ALL_S.iloc[-2])
prval = float(data.ALL_S.iloc[-3])
text = (f'In {ltdate}, the one-month change '+
        f'in the consumer price index (CPI) was {ltval:.1f} '+
        f'percent, following '+
        f'{prval:.1f} percent in {prdate}. ')
write_txt(os.path.join(text_dir, 'cpi_monthly.txt'), text)
print(text)

In January 2024, the one-month change in the consumer price index (CPI) was 0.3 percent, following 0.2 percent in December 2023. 


#### CPI Line Chart

In [34]:
df = pd.read_csv(os.path.join(data_dir, 'cpi_raw.csv'), index_col='date', 
                parse_dates=True)
rn = {'All items': 'ALL', 'All items less food and energy': 'CORE',
      'All items (SA)': 'ALL_S', 'All items less food and energy (SA)': 
      'CORE_S'}
df = df.rename(rn, axis=1)[rn.values()].pct_change(12).dropna() * 100
df.to_csv(os.path.join(data_dir, 'cpi.csv'), index_label='date', 
           float_format='%g')
df.tail(15)

Unnamed: 0_level_0,ALL,CORE,ALL_S,CORE_S
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-01,7.110323,5.957829,7.119466,5.963097
2022-12-01,6.454401,5.707835,6.411498,5.680507
2023-01-01,6.410147,5.582596,6.362123,5.543023
2023-02-01,6.035613,5.537754,5.965523,5.493484
2023-03-01,4.984974,5.589603,4.93509,5.560111
2023-04-01,4.93032,5.519416,4.941059,5.515722
2023-05-01,4.047609,5.329805,4.12069,5.332247
2023-06-01,2.969178,4.828967,3.053262,4.855224
2023-07-01,3.17778,4.652862,3.271781,4.707597
2023-08-01,3.665112,4.349245,3.718721,4.412801


In [35]:
date = dtxt(df.index[-1])['mon1']
allitems = value_text(df['ALL'].iloc[-1])
core = value_text(df['CORE'].iloc[-1])
text = ('Consumer prices '+
        f'{allitems} over the year ending {date} '+
        f', according to the Consumer '+
        'Price Index for all urban consumers (CPI-U). '+
        'The core CPI, which does not include the more-'+
        f'volatile food and energy prices, {core} over '+
        f'the same one-year period.')
write_txt(os.path.join(text_dir, 'cpi_main.txt'), text)
print(text)

Consumer prices increased 3.1 percent over the year ending January 2024 , according to the Consumer Price Index for all urban consumers (CPI-U). The core CPI, which does not include the more-volatile food and energy prices, increased 3.9 percent over the same one-year period.


#### CPI: components contribution to total

In [40]:
df = pd.read_csv(os.path.join(data_dir, 'cpi_raw.csv'), index_col='date', 
                 parse_dates=True)
# Weights and weight date
rw = (pd.read_csv(os.path.join(data_dir, 'cpi_rel_wgts_raw.csv'), 
                  index_col=0))
wgt_date = pd.to_datetime(rw.index.name)
wgts = rw['CPI-U'].drop_duplicates()

# Calculate contribution to annual growth rate
uwt = (((df.divide(df.loc[wgt_date])).multiply(wgts))
       .divide((df['All items'].divide(df.loc[wgt_date, 'All items'])), 
               axis=0)).dropna(how='all', axis=1)
cols = ['All items', 'Medical care', 'Housing', 'Food', 
        'Recreation', 'Education', 'Transportation', 
        'Apparel', 'Energy', 'Communication', 'Personal care']
cont = uwt.multiply(df.pct_change(12)).loc['2019':, cols]

res = cont.iloc[[-1, -13]].T
dates = res.columns
res.columns = ['Latest', 'Previous']
res = res.sort_values('Latest', ascending=False)
res.drop('All items').to_csv(os.path.join(data_dir, 'cpi_comp.csv'), 
                             index_label='name')

In [41]:
res

Unnamed: 0,Latest,Previous
All items,3.090885,6.410147
Housing,2.084246,3.663906
Food,0.34806,1.381228
Transportation,0.258488,0.60712
Recreation,0.147624,0.255927
Personal care,0.125643,0.14406
Medical care,0.084195,0.250212
Education,0.060942,0.084546
Apparel,0.001411,0.0801
Communication,-0.051558,-0.025408


In [42]:
final = res.join(wgts)
final['AtWgt'] = ((final['CPI-U'] / 100) * 
                  final.loc['All items', 'Latest'])
final['Share'] = ((final['Latest'] / 
                   final.loc['All items', 'Latest'])) * 100
final = final.drop('All items')
final['Ratio'] = abs(final['Latest'] / final['AtWgt'])
final['ltabs'] = abs(final['Latest'])
final['Points'] = final['CPI-U'] * final['Ratio'] * final['ltabs']

final


Unnamed: 0,Latest,Previous,CPI-U,AtWgt,Share,Ratio,ltabs,Points
Housing,2.084246,3.663906,45.065,1.392907,67.432008,1.496328,2.084246,140.54487
Food,0.34806,1.381228,13.555,0.418969,11.260838,0.830752,0.34806,3.919442
Transportation,0.258488,0.60712,15.898,0.491389,8.362916,0.526036,0.258488,2.161714
Recreation,0.147624,0.255927,5.307,0.164033,4.776105,0.899963,0.147624,0.705067
Personal care,0.125643,0.14406,2.357,0.072852,4.064941,1.724625,0.125643,0.51073
Medical care,0.084195,0.250212,8.004,0.247394,2.723963,0.340325,0.084195,0.229343
Education,0.060942,0.084546,2.489,0.076932,1.971657,0.792148,0.060942,0.120156
Apparel,0.001411,0.0801,2.512,0.077643,0.045647,0.018172,0.001411,6.4e-05
Communication,-0.051558,-0.025408,3.417,0.105616,-1.668064,0.488166,0.051558,0.086002
Energy,-0.302764,0.624406,6.655,0.205698,-9.795375,1.471882,0.302764,2.965684


In [43]:
# Generate text
styles = [('c', 'contribution'), ('to', 'contribution_to'), 
          ('of', 'contribution_of')]
groups = [('lt', 'Latest'), ('pr', 'Previous')]
final = final.join(pd.DataFrame({f'{name}_{cname}': final[col].apply(
    lambda x: value_text(x, style, 'pp', threshold=0.1)) for (name, style), (cname, col) 
                              in itertools.product(styles, groups)}))
compare = lambda x: compare_text(x.Latest, x.Previous, 
                                 cutoffs=[0.05, 0.3, 1])
final['Compare'] = final.apply(compare, axis=1)
casual = lambda x: value_text(x, 'contribution_to', 'pp', casual=True)
final['to_lt_cas'] = final.Latest.apply(casual)
increase = lambda x: value_text(x, 'increase_by', 'pp', adj='inflation')
final['inc_lt'] = final.Latest.apply(increase)
final['same_sign'] = final.apply(lambda x: np.where(
    np.sign(x.Latest) == np.sign(x.Previous), 
    value_text(x.Previous, 'plain', 'pp'), 
    value_text(x.Previous, 'contribution_of', 'pp')), axis=1)
t = final.sort_values('Points', ascending=False)
t['of_lt'] = t.of_lt.str.replace("a ", "")
t['of_pr'] = t.of_pr.str.replace("a ", "")
t['overweight'] = ''
ltdt = dtxt(dates[0])['mon1']
prdt = dtxt(dates[1])['mon1']
if t.Ratio.max() > 2:
    ocat = t.Ratio.idxmax()
    otxt = (f'The {ocat.lower()} category makes up '+
            f'{t.loc[ocat, "CPI-U"]:.1f} percent of the CPI '+
            f'basket, but accounts for {t.loc[ocat, "Share"]:.1f} '+
            f'percent of {ltdt} inflation. ')
    t.at[ocat, 'overweight'] = otxt
    
cat1 = t.index[0]
ltall = res.loc['All items', 'Latest']
cat2 = t.index[1]
cat3 = t.index[2]
cat4 = t.index[3]
cat5 = t.drop([cat1, cat2, cat3, cat4]).sort_values('CPI-U').index[-1]
text = (f'In {ltdt}, {cat1.lower()} prices {t.loc[cat1, "to_lt"]} '+
        f'the CPI one-year inflation rate of {ltall:.1f} percent, '+
        f"{t.loc[cat1, 'Compare']} the category's {prdt} "+
        f'{t.loc[cat1, "of_pr"]}. {t.loc[cat2, "overweight"]}{cat2} '+
        f'prices {t.loc[cat2, "to_lt_cas"]} {ltdt} inflation, '+
        f'{t.loc[cat2, "Compare"]} the year-prior {t.loc[cat2, "of_pr"]}. '+
        f'{t.loc[cat3, "overweight"]}{cat3} prices {t.loc[cat3, "inc_lt"]} '+
        f'in the latest data, compared to {t.loc[cat3, "same_sign"]} '+
        f'in {prdt}.{cat4} prices '+
        f'{t.loc[cat4, "inc_lt"]} in {ltdt}, {t.loc[cat4, "Compare"]} '+
        f'the year-prior {t.loc[cat4, "of_pr"]}. {t.loc[cat4, "overweight"]}'+
        f'{cat5} prices make up {t.loc[cat5, "CPI-U"]:.1f} percent of the '+
        f'CPI basket and {t.loc[cat5, "to_lt"]} overall inflation in the '+
        f'latest data, {t.loc[cat5, "Compare"]} a {t.loc[cat5, "of_pr"]} '+
        f'one year prior. {t.loc[cat5, "overweight"]}')
write_txt(os.path.join(text_dir, 'cpicomp.txt'), text)
print(text)

In January 2024, housing prices contributed 2.1 percentage points to the CPI one-year inflation rate of 3.1 percent, far below the category's January 2023 contribution of 3.7 percentage points. Food prices added 0.3 percentage point to January 2024 inflation, far below the year-prior contribution of 1.4 percentage points. Energy prices reduced the inflation rate by 0.3 percentage point in the latest data, compared to a contribution of 0.6 percentage point in January 2023.Transportation prices increased the inflation rate by 0.3 percentage point in January 2024, substantially below the year-prior contribution of 0.6 percentage point. Medical care prices make up 8.0 percent of the CPI basket and did not contribute to overall inflation in the latest data, slightly below a contribution of 0.3 percentage point one year prior. 


#### PPI

In [None]:
df = bls_api({'WPUFD4': 'PPIFD',
              'WPSFD4': 'PPIFDsa',
              'WPU00000000': 'PPIACO',
              'WPUFD49116': 'PPIFD_Core',
              'WPU101707': 'Steel',
              'WPU081': 'Lumber'}, (1988, 2023), bls_key)

df.to_csv(os.path.join(data_dir, 'ppi_index.csv'), index_label='date')

In [None]:
df = pd.read_csv(data_dir / 'ppi_index.csv', index_col='date', 
                 parse_dates=True)
ppi = (df[['PPIACO', 'PPIFD']].pct_change(12) * 100)
ppi.to_csv(data_dir / 'ppi.csv', index_label='date')

adj = node_adj(ppi[['PPIACO', 'PPIFD']])
smax = ppi[['PPIACO', 'PPIFD']].iloc[-1].idxmax()
adj[smax] = adj[smax] + 0.35

colors = {'PPIACO': 'green!80!blue', 
          'PPIFD': 'violet'}
date = {series: 'm' if series == smax else None 
        for series in colors.keys()}
nodes  ='\n'.join([end_node(ppi[series], color, 
                            date=date[series], 
                            size=1.1, offset=adj[series]) 
                   for series, color in colors.items()])
write_txt(text_dir / 'ppi_nodes.txt', nodes)  

ch = value_text(ppi.PPIACO.iloc[-1])
fd = value_text(ppi.PPIFD.iloc[-1])
prval = ppi.PPIACO.iloc[-13]
yr3val = ppi.PPIACO.rolling(36).mean().iloc[-1]
compare = compare_text(ppi.PPIACO.iloc[-1], prval, [1.0, 3.0, 5.0])
date = dtxt(ppi.index[-1])['mon1']
date2 = dtxt(ppi.index[-13])['mon1']

text = ('The Bureau of Labor Statistics \\href{https://www.bls.gov/ppi/}'+
        '{report} \\textbf{prices producers receive}. The goods-only producer '+
        f'price index (PPI) for all commodities {c_line(colors["PPIACO"])} '+
        f'{ch} over the year ending {date}, {compare} the 12-month '+
        f'growth rate of {prval:.1f} percent in {date2}. The index for final '+
        f'demand goods, services, and construction {fd} over the year ending '+
        f'{date} {c_line(colors["PPIFD"])}.')
write_txt(text_dir / 'ppi_main.txt', text)
print(text)

In [None]:
# One month change
df = pd.read_csv(data_dir / 'ppi_index.csv', index_col='date', 
                 parse_dates=True)
s = df[['PPIFDsa', 'PPIACO']]
data = ((np.log(s) - np.log(s.shift(1)))) * 100
data['label'] = [dt.strftime('%b\\\`%y') if dt.month == 1 
                 else dt.strftime('%b') for dt in data.index]
data.iloc[-19:].to_csv(data_dir / 'ppi_monthly.csv', 
                         index_label='date', float_format='%g')
ltdate = dtxt(data.index[-1])['mon1']
prdate = dtxt(data.index[-2])['mon1']
ltval = value_text(data.PPIFDsa.iloc[-1], 'plain')
prval = value_text(data.PPIFDsa.iloc[-2], 'increase_of', threshold=0.1)
ltaval = value_text(data.PPIACO.iloc[-1], 'plain')
praval = value_text(data.PPIACO.iloc[-2], 'plain')
text = (f'In {ltdate}, the one-month change in PPI final '+
        f'demand prices was {ltval} {c_box("violet")}, following '+
        f'{prval} in {prdate}. The one-month '+
        f'change in the all commodities index was {ltaval} '+
        f'{c_box("green!80!blue")} in {ltdate} and {praval} '+
        f'in {prdate}.')
write_txt(text_dir / 'ppi_monthly.txt', text)
print(text)

In [None]:
p = df[['Steel', 'Lumber']]
data = (p / p.iloc[0]).loc['1989':]
data.to_csv(data_dir / 'ppi_commodities.csv', index_label='date', 
            float_format='%g')

stlt = value_text(data.Steel.pct_change(12).iloc[-1] * 100)
ltdt = dtxt(data.index[-1])['mon1']
st19 = value_text(((data.Steel.iloc[-1] / 
                    data.loc['2019-12-01', 'Steel']) - 1) * 100)
lumlt = value_text(data.Lumber.pct_change(12).iloc[-1] * 100)
lum19 = value_text(((data.Lumber.iloc[-1] / 
                    data.loc['2019-12-01', 'Lumber']) - 1) * 100)
stcol = c_line('blue!50!gray')
lucol = c_line('green!60!yellow!90!black')
text = ('From the producer price index, cold-rolled steel sheet and '+
        f'strip prices {stcol} have {stlt} over the year ending {ltdt}, '+
        f'and {st19} total since December 2019. '+
        f'Lumber prices {lucol} {lumlt} over the year ending {ltdt}, '+
        f'and {lum19} total since 2019.')
write_txt(text_dir / 'ppi_commodities.txt', text)
print(text)