In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *


qtrs = {1: 'first', 2: 'second', 3: 'third', 4: 'fourth'}

In [2]:
def ip_retrieve_data():
    '''
    Pull industrial production index data from Fed
    '''
    url_base = 'https://www.federalreserve.gov/datadownload/Output.aspx?rel=G17'
    dates = 'from=01/01/1988&to=12/31/2020'
    series = 'series=f97ad3652d87a6c1720943c31423103a'
    settings = 'filetype=csv&label=include&layout=seriescolumn'
    
    url = f'{url_base}&{series}&lastobs=&{dates}&{settings}'
    
    raw_data = pd.read_csv(url)
    
    return raw_data

In [3]:
def ip_get_series_dict(raw_data):
    '''
    Use raw data to identify series and series names 
    and store the results as a python dictionary
    '''
    d = {v: re.sub("\s+[\(\[].*?[\)\]]", "", i.split(';')[0]) 
         for i, v in raw_data.iloc[4, 1:].iteritems()}
    
    return d

In [4]:
def ip_clean_data(raw_data):
    '''
    Convert raw industrial production data to dataframe
    with date index column and clear column names
    '''
    date_column = raw_data.loc[5:, 'Series Description']
    date_index = pd.to_datetime(date_column).rename('Date')
    columns = [series_dict[series_id] 
               for series_id in raw_data.iloc[4, 1:].values]
    
    clean_data = raw_data.iloc[5:, 1:].astype('float')
    clean_data.index = date_index
    clean_data.columns = columns
    
    return clean_data

In [5]:
def ip_retrieve_weights(series_dict):
    '''
    Retrieve relative importance data for each series that
    was identified in the series_dict
    '''
    adj_series_dict = {k[3:-2]: v for k, v in series_dict.items()}
    series = adj_series_dict.keys()
    url = 'https://www.federalreserve.gov/releases/g17/ipdisk/'
    file = 'ipweights_sa.txt'
    columns = ['Series', 'Year', 'January', 'February', 'March', 
              'April', 'May', 'June', 'July', 'August', 
              'September', 'October', 'November', 'December']
    raw_weights = pd.read_csv(f'{url}{file}', sep='\s+', skiprows=1)
    raw_weights.columns = columns
    weights = (raw_weights[raw_weights['Series'].isin(series)]
               .set_index(['Series', 'Year']).stack().reset_index())
    weights['Date'] = (pd.to_datetime(weights['level_2'] + ' 01, ' 
                    + weights['Year'].astype('int').astype('str')))
    weights = (weights.set_index(['Series', 'Date'])[0]
               .unstack().T.rename(adj_series_dict, axis=1)
               .loc['1988':])
    
    return weights

In [6]:
def ip_growth_contrib(data, weights, series_list):
    '''
    Return the one-year contribution to industrial production growth
    for each series in the series_list. 
    
    data: dataframe with industrial production index
    weights: dataframe with relative importance of each series
    series_list: column names that match data and weight columns
    '''
    contribution = pd.DataFrame()
    for series in series_list:
        for date in data[series].index:
            series_weight = weights.loc[date, series] / 100
            growth_rate = (data[series].pct_change(12).loc[date]) * 100
            contribution.at[date, series] = growth_rate * series_weight
            
    return contribution.loc['1989':]

In [7]:
def ip_export_data(data, weights):
    '''
    Save individual csv files that feed into chartbook charts
    
    Extra steps taken to exclude quarters that aren't complete
    
    Charts:
    indpro: Total IP Index and Manufacturing Index Line Chart:
    indprogr: IP Growth Contribution by Market Group - Long-term
    indprogr_rec: IP Growth Contribution by Market Group - Recent
    indprogr2: IP Growth Contribution by Industry Group - Long-term
    indprogr_rec2: IP Growth Contribution by Industry Group - Recent
    '''
    export_file = data_dir / 'indpro.csv'
    series_list = ['Manufacturing', 'Total index']
    data[series_list].loc['1989':].to_csv(export_file, 
                                          index_label='date', 
                                          float_format='%g')
    print('Saved: ', export_file)
    
    series_dict = {'indprogr': ['Consumer goods', 'ENS', 'Materials'],
                   'indprogr2': ['Durable manufacturing', 'Mining', 
                                 'Nondurable manufacturing', 
                                 'Electric and gas utilities']}
    
    for i in [data, weights]:
        i['ENS'] = i['Equipment, total'] + i['Nonindustrial supplies']
    
    quarter = data.index[-1].quarter
    year = data.index[-1].year

    full_quarter = data.index[-1]
    current_quarter_len = (
        len(data.loc[(data.index.quarter == quarter) & 
                           (data.index.year == year)]))
    
    if current_quarter_len < 3:
        full_quarter = (data.index[-1] - 
                           pd.DateOffset(months=current_quarter_len))
    
    for name, series_list in series_dict.items():
        contrib = ip_growth_contrib(data, weights, series_list)
        export_file = data_dir / f'{name}.csv'
        (contrib.resample('QS').mean().loc[:full_quarter]
         .to_csv(export_file, index_label='date', float_format='%g'))
        print('Saved: ', export_file)
        export_file = data_dir / f'{name}_rec.csv'
        contrib.loc['2015':].to_csv(export_file, index_label='date', 
                                    float_format='%g')
        print('Saved: ', export_file)

In [8]:
def three_year_growth_text(clean_data, series):
    '''
    Return short text string with three year growth
    '''
    
    growth = three_year_growth(clean_data, series)
    
    if growth > 0.1:
        text = f'increased at an annual rate of {growth:.1f} percent'
    elif growth < -0.1:
        text = f'decreased at an annual rate of {abs(growth):.1f} percent'
    else:
        text = 'was virtually unchanged'
        
    return text

In [9]:
def ip_export_text(data, weights):
    '''
    Export text files that fill in text in the chartbook
    '''
    s_info = series_info(data['Manufacturing'])
    latest_date = s_info['date_latest_ft']
    mfg_3yr = three_year_growth(data, 'Manufacturing')
    tot_3yr = three_year_growth(data, 'Total index')
    max_diff = (s_info['val_latest'] / s_info['val_max'] - 1) * 100
    mfg = three_year_growth_text(data, 'Manufacturing')
    mn = three_year_growth_text(data, 'Mining')
    ut = three_year_growth_text(data, 'Electric and gas utilities')
    
    text = ('Manufacturing production increased at an annual '+
            f'rate of {mfg_3yr:.1f} percent over the past three years, as '+
            f'of {latest_date}, but remains {abs(max_diff):.1f} percent '+
            f'below its {s_info["date_max_ft"]} rate. Total '+
            '\href{https://www.federalreserve.gov/releases/g17/Current/default.htm}{industrial production} '+
            f'increased at an annual rate of {tot_3yr:.1f} '+
            f'percent over the same period. Mining production {mn}, '+
            f'while production of electric and gas utilities {ut}.')
    
    write_txt(text_dir / 'indpro.txt', text)
    
    print(text)
    
    cg = three_year_growth_text(data, 'Consumer goods')
    eq = three_year_growth_text(data, 'Equipment, total')
    ns = three_year_growth_text(data, 'Nonindustrial supplies')
    mat = three_year_growth_text(data, 'Materials')
    
    text = (f'By market group, production of consumer goods {cg} over the '+
            f'past three years, as of {latest_date}. Production of '+
            f'business equipment {eq}, production of nonidustrial supplies {ns}, and '+
            f'production of materials {mat}.')
    
    write_txt(text_dir / 'indpro2.txt', text)
    
    print(text)

In [10]:
def ip_export_table(clean_data, weights):
    '''
    Create a custom chartbook table from the IP data
    '''
    export_file = data_dir / 'indpro.tex'
    
    d = {'Total index': ' & Total index',
         'Manufacturing': ' & \hspace{2mm}Manufacturing',
         'Durable manufacturing': '\cbox{blue!60!black} & \hspace{4mm}Durable manufacturing',
         'Motor vehicles and parts': ' & \hspace{6mm}Motor vehicles \& parts',
         'Nondurable manufacturing': '\cbox{blue!20!cyan!80!white} & \hspace{4mm}Nondurable manufacturing',
         'Mining': '\cbox{orange!20!yellow} & \hspace{2mm}Mining',
         'Electric and gas utilities': '\cbox{green!80!blue} & \hspace{2mm}Utilities',
         'Consumer goods': '\cbox{violet!60!black} & \hspace{2mm}Consumer goods',
         'Durable consumer goods': ' & \hspace{4mm}Consumer durables',
         'Automotive products': ' & \hspace{6mm}Automotive products',
         'Nondurable consumer goods': ' & \hspace{4mm}Consumer nondurables',
         'Foods and tobacco': ' & \hspace{6mm}Foods and tobacco',
         'Chemical products': ' & \hspace{6mm}Chemical products',
         'Consumer energy products': ' & \hspace{6mm}Consumer energy products',
         'ENS': '\cbox{magenta} & \hspace{2mm}Equipment \& nonindustrial supplies',
         'Equipment, total': ' & \hspace{4mm}Equipment',
         'Industrial equipment': ' & \hspace{6mm}Industrial equipment',
         'Nonindustrial supplies': ' & \hspace{4mm}Nonindustrial supplies',
         'Construction supplies': ' & \hspace{6mm}Construction supplies',
         'Business supplies': ' & \hspace{6mm}Business supplies',
         'Materials': '\cbox{orange!70!yellow} & \hspace{2mm}Materials',
         'Consumer parts': ' & \hspace{4mm}Consumer parts',
         'Equipment parts': ' & \hspace{4mm}Equipment parts',
         'Chemical materials': ' & \hspace{4mm}Chemical materials',
         'Energy materials': ' & \hspace{4mm}Energy materials'}
    
    table = pd.DataFrame()
    data = ip_growth_contrib(clean_data, weights, d.keys())
    table = data.iloc[-3:].iloc[::-1].T
    table.columns = [date.strftime('%b %Y') for date in table.columns]
    table['1-year'] = data.rolling(12).mean().iloc[-1]
    table['3-year'] = data.rolling(36).mean().iloc[-1]
    table['10-year'] = data.rolling(120).mean().iloc[-1]
    table['30-year'] = data.rolling(360).mean().iloc[-1]
    table = table.round(2).applymap('{:,.2f}'.format)
    table.index = [d[name] for name in table.index]

    (table.to_csv(export_file, sep='&', line_terminator='\\\ ', 
                  quotechar=' ', index_label='&'))
    
    print('Saved: ', export_file)
    
    return table

In [11]:
def ip_clean_data2(raw_data):
    '''
    Convert raw industrial production data to dataframe
    with date index column and clear column names
    '''
    date_column = raw_data.loc[5:, 'Series Description']
    date_index = pd.to_datetime(date_column).rename('Date')
    columns = [series_id for series_id in raw_data.iloc[4, 1:].values]
    
    clean_data = raw_data.iloc[5:, 1:].astype('float')
    clean_data.index = date_index
    clean_data.columns = columns
    
    return clean_data

In [12]:
raw_data = ip_retrieve_data()

In [13]:
series_dict = ip_get_series_dict(raw_data)

In [14]:
clean_data = ip_clean_data(raw_data)

In [15]:
weights = ip_retrieve_weights(series_dict)

In [16]:
ip_export_data(clean_data, weights)

Saved:  ../chartbook/data/indpro.csv
Saved:  ../chartbook/data/indprogr.csv
Saved:  ../chartbook/data/indprogr_rec.csv
Saved:  ../chartbook/data/indprogr2.csv
Saved:  ../chartbook/data/indprogr2_rec.csv


In [17]:
ip_export_text(clean_data, weights)

Manufacturing production increased at an annual rate of 0.9 percent over the past three years, as of February 2020, but remains 4.7 percent below its December 2007 rate. Total \href{https://www.federalreserve.gov/releases/g17/Current/default.htm}{industrial production} increased at an annual rate of 2.2 percent over the same period. Mining production increased at an annual rate of 7.6 percent, while production of electric and gas utilities increased at an annual rate of 4.0 percent.
By market group, production of consumer goods increased at an annual rate of 1.2 percent over the past three years, as of February 2020. Production of business equipment increased at an annual rate of 1.7 percent, production of nonidustrial supplies increased at an annual rate of 1.1 percent, and production of materials increased at an annual rate of 3.3 percent.


In [18]:
table = ip_export_table(clean_data, weights)

Saved:  ../chartbook/data/indpro.tex


### Bar chart

In [19]:
base = 'https://www.federalreserve.gov/datadownload/Output.aspx?'
srs = 'rel=G17&series=644452cb9b9f8c5a43cd9afb772f1b16&lastobs=50&'
dt = 'from=&to=&'
oth = 'filetype=csv&label=include&layout=seriescolumn'
url = base + srs + dt + oth

raw_data = pd.read_csv(url)

series_dict = ip_get_series_dict(raw_data)

clean_data = ip_clean_data2(raw_data)
ltdate = clean_data.index[-1].strftime('%B %Y')
write_txt(text_dir / 'ip_ind_ldate.txt', ltdate)

ip = (pd.Series({series_dict[row[0]]: row[1] 
                 for row in clean_data.pct_change(36).iloc[-1].iteritems() 
                 if 'IP' in row[0]}).rename('IP'))
cu = (pd.Series({series_dict[row[0]]: row[1] 
                 for row in clean_data.diff(36).iloc[-1].iteritems() 
                 if 'UTIL' in row[0]}).rename('CU'))
cp = (pd.Series({series_dict[row[0]]: row[1] 
                 for row in clean_data.pct_change(36).iloc[-1].iteritems() 
                 if 'CAP.' in row[0]}).rename('CP'))

  cu = (pd.Series({series_dict[row[0]]: row[1]


In [20]:
final = pd.DataFrame([ip, cp]).T.sort_values('IP', ascending=False) * 100
final.index = final.index.str.replace('and', '\&')
final.round(1).to_csv(data_dir / 'ip_comp.csv', index_label='name', sep=';')

words = ['none', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', '11', 'all 12']
nums = list(range(0, 13))
nums_to_words = dict(zip(nums, words))
verb = {n: 'were' if n!= 1 else 'was' for n in range(0, 13)}

thresh = 0.1
incval = len(final[final['IP'] >= thresh])
decval = len(final[final['IP'] <= -thresh])
unchval = len(final[(final['IP'] < thresh) & (final['IP'] > -thresh)])

text = ('Of a subset of 12 industries that contribute the majority of industrial production, '+
        f'{nums_to_words[incval]} increased \\textbf{{production}} over the past three '+
        f'years, {nums_to_words[decval]} decreased '+
        f'production, and {nums_to_words[unchval]} {verb[unchval]} unchanged '
        +'(see\cbox{green!60!lime}). ')

top = final.iloc[0].name
topval = final.IP.iloc[0]
topch = f'{["increased" if topval >=0 else "decreased"][0]} by {abs(topval):.1f} percent'

scnd = final.iloc[1].name
scndval = final.IP.iloc[1]
scndch = f'{["increased" if scndval >=0 else "decreased"][0]} by {abs(scndval):.1f} percent'

thd = final.iloc[2].name.lower()
thdval = final.IP.iloc[2]
thdch = f'{["increased" if thdval >=0 else "decreased"][0]} by {abs(thdval):.1f} percent'

bot = final.iloc[-1].name.lower()
botval = final.IP.iloc[-1]
botch = f'{["increased" if botval >=0 else "decreased"][0]} by {abs(botval):.1f} percent'

ltdate = clean_data.index[-1].strftime('%B %Y')

text2 = (f'{top} production {topch} in total over the three years ending {ltdate}. '+
         f'{scnd} production {scndch}, and {thd} production {thdch}. '+
         f'In contrast, {bot} production {botch} over the same period.')

txt = text + text2

write_txt(text_dir / 'ip_comp1.txt', txt)

txt

'Of a subset of 12 industries that contribute the majority of industrial production, nine increased \\textbf{production} over the past three years, two decreased production, and one was unchanged (see\\cbox{green!60!lime}). Mining production increased by 24.6 percent in total over the three years ending February 2020. Computer \\& electronic product production increased by 18.4 percent, and electric \\& gas utilities production increased by 12.5 percent. In contrast, aerospace \\& miscellaneous transportation eq. production decreased by 7.8 percent over the same period.'

In [21]:
thresh = 0.1
incval = len(final[final['CP'] >= thresh])
decval = len(final[final['CP'] <= -thresh])
unchval = len(final[(final['CP'] < thresh) & (final['CP'] > -thresh)])

cpch = abs(final.CP).sort_values().index[-1]

cpch_val = final.loc[cpch, 'CP']

cpch_txt = [f'an increase of {cpch_val:.1f} percent' if final.loc[cpch, "CP"] > 0 
            else f'a decrease of {abs(cpch_val):.1f} percent'][0]

cpch2 = abs(final.CP).sort_values().index[-2]

cpch2_val = final.loc[cpch2, 'CP']

cpch2_txt = [f'an increase of {cpch2_val:.1f} percent' if final.loc[cpch2, "CP"] > 0 
            else f'a decrease of {abs(cpch2_val):.1f} percent'][0]

text3 = (f'Over the three years ending {ltdate}, {nums_to_words[incval]} of the 12 '+
         f'industries increased \\textbf{{capacity}}, {nums_to_words[decval]} '+
         f'decreased capacity, and {nums_to_words[unchval]} {verb[unchval]} unchanged '+
         '(see\cbox{cyan!90!blue}). '+
         f'The most significant change over the period was {cpch_txt} in {cpch.lower()} capacity, '+
         f'follwed by {cpch2_txt} in {cpch2.lower()} capacity.')

write_txt(text_dir / 'ip_comp2.txt', text3)

### Latest monthly data in two small plots

In [22]:
date_latest = clean_data.index[-1].strftime('%Y-%m-%d')
month_short = clean_data.index[-1].strftime('%b')

In [23]:
text = ('xtick={{2015-01-01}, {2016-01-01}, {2017-01-01}, '+
        f'{{2018-01-01}}, {{2019-01-01}}, {{{date_latest}}}}}, '+
        f'xticklabels={{`15, `16, `17, `18, `19, {month_short}}}')

In [24]:
text_full = ('\\noindent \hspace*{-2mm} \\begin{tikzpicture}'+
' \\begin{axis}[\\bbar{y}{0}, \dateaxisticks ytick={-4, -2, 0, 2, 4, 6}, '+
' clip=false, width=6.5cm, height=4.6cm, '+
text + 
', minor xtick={}, enlarge y limits=0.06, '+
' enlarge x limits={0.04}]'+
' \sbar{violet!60!black}{date}{Consumer goods}{data/indprogr_rec.csv}'+
' \sbar{magenta}{date}{ENS}{data/indprogr_rec.csv}'+
' \sbar{orange!70!yellow}{date}{Materials}{data/indprogr_rec.csv}'+
' \end{axis}'+
' \end{tikzpicture}'+
' \hfill'+
' \\begin{tikzpicture}'+
' \\begin{axis}[\\bbar{y}{0}, \dateaxisticks ytick={-4, -2, 0, 2, 4, 6}, '+
' clip=false, width=6.5cm, height=4.6cm, '+
text + 
', minor xtick={}, enlarge y limits=0.06, '+
' enlarge x limits={0.04}]'+
' \sbar{blue!60!black}{date}{Durable manufacturing}{data/indprogr2_rec.csv}'+
' \sbar{blue!20!cyan!80!white}{date}{Nondurable manufacturing}{data/indprogr2_rec.csv}'+
' \sbar{orange!20!yellow}{date}{Mining}{data/indprogr2_rec.csv}'+
' \sbar{green!80!blue}{date}{Electric and gas utilities}{data/indprogr2_rec.csv}'+
' \end{axis}'+
' \end{tikzpicture} \\')

In [25]:
write_txt(text_dir / 'ip_latest_monthly.tex', text_full)

### Main line chart 

In [26]:
text = ('xtick={{1989-01-01}, {1995-01-01}, {2000-01-01}, {2005-01-01}, '+
f'{{2010-01-01}}, {{2015-01-01}}, {{{date_latest}}}}},'+
f'xticklabels={{`89, `95, `00, `05, `10, `15, {month_short}}}, ')

text_full = ('\\noindent \hspace*{-2mm} \\begin{tikzpicture}'+
'\\begin{axis}[\\bbar{y}{0}, \dateaxisticks ytick={60, 80, 100}, '+
'enlarge y limits={0.05}, legend cell align={left},'+
text + 
'minor xtick={}, '+
'clip=false, height=4.7cm, width=6.4cm,'+
'legend style={fill=white, legend columns=1, at={(1.02, 0.33)}}]'+
'\\rbars'+
'\\thickline{red}{date}{Manufacturing}{data/indpro.csv}'+
'\stdline{blue!90!black}{date}{Total index}{data/indpro.csv}'+
'\legend{Manufacturing, Total Index};'+
'\end{axis}'+
'\end{tikzpicture}\\')

write_txt(text_dir / 'ip_main_line.tex', text_full)

### Capacity Utilization

In [31]:
base = 'https://www.federalreserve.gov/datadownload/Output.aspx?'
srs = 'rel=G17&series=316680f2d5251c61c995df7ae36b4b07&lastobs=&'
dt = 'from=01/01/1989&to=12/31/2020&'
oth = 'filetype=csv&label=include&layout=seriescolumn'
url = base + srs + dt + oth

d = {'CAPUTL.B00004.S': 'Manufacturing', 'CAPUTL.B50001.S': 'Total index'}

df = pd.read_csv(url, skiprows=5, index_col=0)[d.keys()].rename(d, axis=1)

df.index = pd.to_datetime(df.index)

df.to_csv(data_dir / 'tcu.csv', index_label='date')

node = end_node(df['Total index'], 'blue!80!black')
write_txt(text_dir / 'tcu_tot_node.txt', node)

node = end_node(df['Manufacturing'], 'blue!40!cyan')
write_txt(text_dir / 'tcu_mfg_node.txt', node)

ldate = df.index[-1].strftime("%B %Y")
lval = df['Total index'].iloc[-1]
sval = df.loc['1989-01-01', 'Total index']
tch = sval - lval

text = (f'In {ldate}, the industrial capacity utilization rate was '+
        f'{lval:.1f} percent '+
        '(see {\color{blue!80!black}\\textbf{---}}), '+
        'and the manufacturing capacity utilization rate was '+
        f'{df["Manufacturing"].iloc[-1]:.1f} percent '+
        '(see {\color{blue!40!cyan}\\textbf{---}}). Total capacity '+
        f'utilization has fallen by {tch:.1f} percentage points since '+
        'January 1989.')

write_txt(text_dir / 'tcu.txt', text)

text

'In February 2020, the industrial capacity utilization rate was 77.0 percent (see {\\color{blue!80!black}\\textbf{---}}), and the manufacturing capacity utilization rate was 75.0 percent (see {\\color{blue!40!cyan}\\textbf{---}}). Total capacity utilization has fallen by 8.2 percentage points since January 1989.'