## Using Python and LaTeX to create beautiful charts
Brian W. Dew (brianwdew@gmail.com), as of January 15, 2017

---

#### Objective:
Using python, obtain economic data from the web and save it as csv and txt files to be read by a LaTex file containing chart formatting. 

In [1]:
# Import libraries
import pandas as pd
import pandas_datareader.data as web
import datetime
import os

if not os.path.exists('data/.'):
    os.makedirs('data/.')

os.chdir('data/.')

#### Chart 1 Real GDP growth by components

In [2]:
# Updated inputs to pandas datareader:
source = 'fred' 
start = datetime.datetime(2005,1,1)
series = {'GDPC1': 'Total',
          'GCEC1': 'Gov Spend',
          'GPDIC1': 'Investment', 
          'NETEXC': 'Net Exports', 
          'PCECC96': 'Consumption'}

# Retrieve data as pandas dataframe named df
df = web.DataReader(series.keys(), source, start, )

# Record GDP total
df['Y'] = df['GDPC1']

# Convert to share of total change
for s in series.keys():
    df[s] = (df[s] - df[s].shift(1))/df['Y'].shift(1) * 400

Y = (df['Y'][-1] / 1000).round(decimals=1)
df = df[series.keys()][4:].round(decimals=1)
df.to_csv('gdp_comp.csv', header = True)  # csv file created

# Declare variables for chart label
q = df.index[-1].quarter
y = df.index[-1].strftime('%Y')
ch = df['GDPC1'][-1]

# Write label to txt file
with open('gdp_comp.txt', 'w') as text_file:
    text_file.write(                  # txt file created
        '{} Q{}: Real GDP: {}T; Growth: {}\%'.format(y, q, Y, ch)
    )

#### Chart 2: Unemployment Rates

Collect U3 data from FRED

In [3]:
start = datetime.datetime(2006,1,1) 
series = {'UNRATE': 'Total',
          'LNS14027659': 'No diploma',
          'LNS14027662': 'Adv. Degree'}

# Retrieve data as pandas dataframe named df
df = web.DataReader(series.keys(), source, start, )
df.to_csv('unemp.csv', header = True)  # csv file created

d1 = df.index[-1].strftime('%b %Y')

# Write label to txt file
with open('unemp.txt', 'w') as text_file:
    text_file.write(                  # txt file created
        '{}: {}\% unemployed and looking for work'.format(d1, df['UNRATE'][-1])
    )

#### Chart 3: U.S. Consumer Price Index since 2006

The first chart is a line plot of the all-item consumer price index (FRED series: CPIAUCSL) and the "core" conumer price index (FRED series: CPILFESL) for all-urban consumers in the United States. The core CPI excludes food and energy prices.

In [4]:
# Inputs to the pandas datareader:
start = datetime.datetime(2005,1,1) 
s1 = 'CPIAUCSL'            
s2 = 'CPILFESL'             

# Retrieve data as pandas dataframe named df
df = web.DataReader([s1, s2], source, start, )

# Obtain the annual percent change (inflation rate)
for s in s1, s2:
    df[s] = df[s].pct_change(periods=12) * 100
    
# Remove data not used in chart, round, and save as csv
df = df[12:].round(decimals=1)
df.to_csv('cpi.csv', header = True)  # csv file created

# Declare variables for chart label
v1 = df[s1].iloc[-1]
v2 = df[s2].iloc[-1]
d1 = df.index[-1].strftime('%b %Y')

# Write label to txt file
with open('cpi.txt', 'w') as text_file:
    text_file.write(                  # txt file created
        '{}: All-items CPI: {}\%; Core CPI: {}\%'
        .format(d1, v1, v2)
    )

#### Chart 4: U.S. Consumer Price Index recent changes by category

Second is a bar chart showing the most recent two month's changes in consumer prices for major categories of items.

In [5]:
# Updated inputs to pandas datareader:
start = datetime.datetime(2006,10,1)
series = {'CPIOGSSL': 'Other goods \& serv',
          'CPIEDUSL': 'Education',
          'CPIRECSL': 'Recreation', 
          'CPIFABSL': 'Food \& Beverage', 
          'CPITRNSL': 'Transportation',
          'CPIHOSSL': 'Housing', 
          'CPIENGSL': 'Energy', 
          'CPIMEDSL': 'Healthcare', 
          'CPIAPPSL': 'Apparel'}

# Retrieve data as pandas dataframe named df
df = web.DataReader(series.keys(), source, start, )
df.columns = series.values()

# Obtain the annual percent change (inflation rate)
for s in df.keys():
    df[s] = df[s].pct_change(periods=12) * 100
d = {s: df[s][-120:].mean() for s in df.keys()} # Five year average

# Write legend months to txt file
with open('cpi_mo1.txt', 'w') as text_file:
    text_file.write(df.index[-2].strftime('%b %Y'))
with open('cpi_mo2.txt', 'w') as text_file:
    text_file.write(df.index[-1].strftime('%b %Y'))  

# Keep only latest two months, transpose, and round
df = df.tail(2).transpose().round(decimals=1)
df.columns = ['one', 'two']
df.loc[:]['ten'] = [round(d[x],1) for x in df.index]
df.index.name = 'Item'
df = df.sort_values(by='two', axis=0, ascending=False)
df.to_csv('cpi_comp.csv', header = True)  # csv file created

#### Chart 5: Earnings Per Share

This chart reads data from Standard and Poors on the reported EPS of S&P 500 companies by sector.

In [6]:
# Get xlsx data from Standard and Poor's website
spfile = 'https://us.spindices.com/documents/additional-material/sp-500-eps-est.xlsx'
    
df = pd.read_excel(spfile,sheetname='SECTOR EPS', skiprows=61)
df = df.ix[1:11,'INDEX NAME':'2017 Q4']
df = df.set_index('INDEX NAME').dropna(axis=1).ix[:,-21:]

dfs = pd.DataFrame()
dfs['mark'] = df.iloc[:,-1]
dfs['avg'] = df.ix[:,-4:].mean(axis=1)
dfs['max'] = df.max(axis=1)
dfs['min'] = df.min(axis=1)
dfs['neg'] = dfs.loc[dfs['min'] < 0]['min']
dfs['min'] = dfs.loc[dfs['min'] >= 0]['min']
dfs['max'] = dfs['max'].subtract(dfs['min'], fill_value=0)
dfs.index.names = ['A']
dfs = dfs.reset_index()

dfs.replace({'S&P 500 ': ''}, regex=True, inplace=True)
dfs.replace({'munication': ''}, regex=True, inplace=True)
dfs.replace({'state.*$': 'state'}, regex=True, inplace=True)
dfs = dfs.set_index('A').sort_values('mark', ascending=False)

dfs.fillna('.').to_csv('eps.csv', header = True)  # csv file created

eps_date = df.columns.values[-1]
# Write label to txt file
with open("eps.txt", "w") as text_file:
    text_file.write(                  # txt file created
        '{}'.format(eps_date)
    )

#### Table 1: Other Economic Indicators

The final cell contains a table with recent developments for 11 other indicators. The data comes from three separate sources and has formatting specific to each indicator, therefore the code is quite lengthy.

In [7]:
# Series to call from the Federal Reserve Economic Data (FRED) pandas DataReader method
fredseries = ['M2OWN', 'MORTGAGE30US', 'INDPRO', 'TWEXBMTH']
start = datetime.datetime(2015,10,1)
freddf = web.DataReader(fredseries, source, start, ).reset_index()
freddf['DATE'] = freddf['DATE'].dt.date
freddf = freddf.set_index('DATE')

# Volatility index (VIX) from CBOE
vixurl = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vixcurrent.csv'
vixcol = ['Date', 'Open', 'High', 'Low', 'VIXCLS']
vix = pd.read_csv(vixurl, skiprows=3000, names=vixcol, parse_dates=['Date']).set_index('Date')['VIXCLS']

# Consumer confidence index data from the University of Michigan website
cc_csv = 'http://www.sca.isr.umich.edu/files/tbmics.csv'
df = pd.read_csv(cc_csv,parse_dates={'DATE': ['Month', 'YYYY']})[-13:]
df['DATE'] = df['DATE'].dt.date
cc = df.set_index('DATE').rename(columns={'ICS_ALL':'cc'})['cc']

# Quandl requests
quandlapi = 'https://www.quandl.com/api/v3/datasets/'
quandlkey = '?api_key=x7q1kgMKv96cXx83GtSN'
quandldts = '&start_date=2015-10-01'
quandlsrs = {'wti':'CHRIS/CME_CL1.csv','treas':'USTREASURY/YIELD.csv','sp':'YAHOO/INDEX_GSPC.csv'}
quandlurls = {k: '{}{}{}{}'.format(quandlapi, v, quandlkey, quandldts) for k,v in quandlsrs.iteritems()}
d = {}
for k,v in quandlurls.items():
    d[k] = pd.read_csv(v, parse_dates=['Date'], nrows=300).set_index('Date').iloc[::-1]
quandl = pd.concat([d['sp']['Close'],d['wti']['Last'],d['treas'][['3 MO','2 YR','10 YR']]],axis=1)
quandl.index = pd.to_datetime(quandl.index)

# DataFrame with all series in the Table
df = pd.concat([quandl, vix, cc, freddf], axis=1)

In [8]:
# Dictionary of table rows and guidance on their order, contents, and format
d = {'3 MO':{'n':4,'name':'3-month treasury bill yield','m_ch':20,'y_ch':252, 't':'diff'},
     '2 YR':{'n':5,'name':'2-year treasury bond yield','m_ch':20,'y_ch':252, 't':'diff'},
     'INDPRO':{'n':10,'name':'Industrial production index','m_ch':1,'y_ch':12, 't':'pct'},
     '10 YR':{'n':6,'name':'10-year treasury bond yield','m_ch':20,'y_ch':252, 't':'diff'},
     'Last':{'n':9,'name':'Crude oil, US\$/barrel','m_ch':20,'y_ch':252, 't':'pct'},
     'M2OWN':{'n':3,'name':'Bank deposit interest rate','m_ch':1,'y_ch':12, 't':'diff'},
     'cc':{'n':11,'name':'Consumer confidence index','m_ch':1,'y_ch':12, 't':'pct'},
     'MORTGAGE30US':{'n':7,'name':'30-year mortgage rate','m_ch':4,'y_ch':52, 't':'diff'},             
     'Close':{'n':1,'name':'S\&P 500 index','m_ch':20,'y_ch':252, 't':'pct'},
     'VIXCLS':{'n':2,'name':'CBOE volatility index (VIX)','m_ch':20,'y_ch':252, 't':'pct'},  
     'TWEXBMTH':{'n':8,'name':'US Dollar, broad index','m_ch':1,'y_ch':12, 't':'pct'},
    }
# LaTeX arrows
upar = '\quad \color{green!80!blue}$\\blacktriangle$\\normalcolor'
dnar = '\quad \color{red!80!orange}$\\blacktriangledown$\\normalcolor'

In [9]:
# This section adds a dictionary entry for the monthly and annual percent change columns
# for each row in the table. Interest rate series get difference rather than pct_change.
for k, v in d.iteritems():
    if d[k]['t'] == 'diff':
        d[k]['val'] = '{:.2f}\%'.format(round(df[k].dropna()[-1],2))
    elif k in ['INDPRO','cc']:
        d[k]['val'] = '{:.1f}'.format(round(df[k].dropna()[-1],1))
    elif k in 'Last':
        d[k]['val'] = '\${:.2f}'.format(round(df[k].dropna()[-1],2))
    else:
        d[k]['val'] = '{:.2f}'.format(round(df[k].dropna()[-1],2))
    if d[k]['m_ch'] == 1:
        d[k]['date'] = df[k].dropna().index[-1].strftime('%b %Y')
    else:
        d[k]['date'] = df[k].dropna().index[-1].strftime('%Y-%m-%d')
    for s in ['y', 'm']:  # Loop for yearly and monthly changes
        # Define perecent change and difference:
        pct_ch = df[k].dropna().pct_change(periods=d[k]['{}_ch'.format(s)])[-1]
        diff_ch = df[k].dropna()[-1] - df[k].dropna()[-d[k]['{}_ch'.format(s)]]
        if d[k]['t'] == 'pct':   
            d[k]['{}_ch_v'.format(s)] = pct_ch*100
            d[k]['{}_ch_s'.format(s)] = '{:.1f}\%'.format(round(pct_ch*100,1))
        else: 
            d[k]['{}_ch_v'.format(s)] = diff_ch
            d[k]['{}_ch_s'.format(s)] = '{:.2f}$\; $'.format(round(diff_ch,2))
        if d[k]['{}_ch_v'.format(s)] > 0.005:
            d[k]['{}_ar'.format(s)] = upar # Green up arrow if positive
        elif d[k]['{}_ch_v'.format(s)] < -0.005:
            d[k]['{}_ar'.format(s)] = dnar # Red down arrow if negative
        else:
            d[k]['{}_ar'.format(s)] = '' # For cases with no change

In [10]:
order = {d[k]['n']: k for k in d.keys()}    
# Write label to txt file
with open('table1.txt', 'w') as text_file:
    for n in range(1,12):
        sd = d[order[n]]
        text_file.write( ' \ {} $\quad$ & {} & {} & {} & {} & {} $\; $& {} \ \\\ '.format(
            sd['name'], sd['val'], sd['m_ar'], sd['m_ch_s'], sd['y_ar'], sd['y_ch_s'], sd['date'])
        ) 

#### Run LaTeX file

In [11]:
os.chdir('..')
os.system("pdflatex dash.tex")

0