In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import collections
import copy as cp
%matplotlib inline



In [None]:
economic_datasets = collections.OrderedDict([
 
    ('real_gdp_growth_rate', {'filename': 'data/A191RL1Q225SBEA_real_gross_domestic_product.csv',
            'column_rename': {'A191RL1Q225SBEA': 'real_gdp_growth_rate'}}),
    ('sticky_inflation_rate', {'filename': 'data/CORESTICKM158SFRBATL_sticky_consumer_price_index_annual_rate_of_change_less_food_and_energy.csv',
            'column_rename': {'CORESTICKM158SFRBATL': 'sticky_inflation_rate'}}),
    #('federal_deficit_percent_gdp', {'filename': 'data/GFDEGDQ188S_federal_debt_to_GDP.csv', 'column_rename':\
    #        {'GFDEGDQ188S': 'federal_debt_percent_gdp'}, 'convert_this_column_to_annual_percent_increase':\
    #        'federal_debt_percent_gdp', 'frequency': 'quarterly'}),
    #### This is annual
    ('federal_deficit_percent_gdp', {'filename': 'data/FYFSGDA188S_federal_surplus_or_deficit_as_percent_of_gdp.csv',
            'column_rename': {'FYFSGDA188S': 'federal_deficit_percent_gdp'}, 'invert': True}),
    ('nonfarm_job_growth_monthly_thousands', {'filename': 'data/PAYEMS_total_nonfarm_jobs.csv', 'column_rename':\
            {'PAYEMS': 'nonfarm_jobs_thousands'},  'apply_delta_to_column': 'nonfarm_jobs_thousands'}),
    ('federal_expenditure_percent_annual_growth_rate', {'filename': 'data/FGEXPND_federal_government_current_expenditures_in_billions.csv',
            'column_rename': {'FGEXPND': 'federal_expenditures_billions'},
            'convert_this_column_to_annual_percent_increase': 'federal_expenditures_billions', 'frequency': 'quarterly'}),
    ('s_and_p_500_percent_annual_growth_rate', {'filename': 'data/s_and_p_500_by_month.csv',
            'convert_this_column_to_annual_percent_increase': 's_and_p_500_value', 'frequency': 'monthly'}),
#     ('u3_unemployment', {'filename': 'data/UNRATE_civilian_unemployment_rate_U3.csv'}),
#     ('u6_unemployment', {'filename': 'data/U6UNEM6AR_u6_unemployment_rate.csv'}),

])



In [None]:
def make_time_series_plot(input_df, y_column, xlabel, ylabel, xlim, ylim):
    plt.figure(figsize=(16,8))
    plt.grid(True)
    plt.plot(input_df['DATE'], input_df[y_column])
    plt.xlabel(xlabel, fontsize=22)
    plt.ylabel(ylabel, fontsize=22)
    if xlim is not None:
        plt.gca().set_xlim(xlim)
    if ylim is not None:
        plt.gca().set_ylim(ylim)
    plt.plot()

In [None]:


combined_df = None
for metric_key, metric_instructions in economic_datasets.iteritems():
    #print metric_key, ', instructions: ', metric_instructions
    this_df = pd.read_csv(metric_instructions['filename'])
    this_df['DATE'] = pd.to_datetime(this_df['DATE'])
    this_df = this_df.sort_values('DATE')
    if 'column_rename' in metric_instructions.keys():
        this_df = this_df.rename(columns=metric_instructions['column_rename'])
#     if metric_instructions.get('apply_delta', False):
#         this_df[metric_key] = this_df[]
    if metric_instructions.get('apply_delta_to_column', None) is not None:
        print 'Do Xform of ', this_df.head(10)
        if metric_instructions.get('delta_size', None) is None:
            delta_size = 1
        else:
            delta_size =  metric_instructions['delta_size']
        print 'metric_key: ', metric_key, ', do delta'
        this_df[metric_key] = this_df[metric_instructions['apply_delta_to_column']].diff(periods=delta_size)
    if metric_instructions.get('convert_this_column_to_annual_percent_increase', None):
        convert_this_col = metric_instructions['convert_this_column_to_annual_percent_increase']
        if metric_instructions['frequency']=='monthly':
            delta_offset = 12
        elif metric_instructions['frequency']=='quarterly':
            delta_offset = 4
        else:
            raise ValueError('Frequency '+frequency+' not understood')
        one_year_future_values = this_df[convert_this_col].values[delta_offset:]
        current_values = this_df[convert_this_col].values[:-delta_offset]
        percent_increase_values = [np.nan]*delta_offset
        for current_value, one_year_future_value in zip(current_values, one_year_future_values):
            percent_increase_values.append(100 * (one_year_future_value - current_value) / current_value)
        this_df[metric_key] = percent_increase_values
    if metric_instructions.get('invert', False):
        this_df[metric_key] = this_df[metric_key] * -1
        #this_df[metric_key] = this_df.apply(metric_instructions['transform_function'], axis=1)
    #print 'this_df: ', this_df.head(10)
    
    if combined_df is None:
        combined_df = cp.deepcopy(this_df)
    else:
        combined_df = pd.merge(combined_df, this_df, on='DATE', how='outer')
combined_df = combined_df.sort_values('DATE')
print 'combined_df: ', combined_df

In [None]:
def make_time_series_plot(input_df, y_column, ylabel, xlim, ylim):
    filtered_input_df = input_df[pd.notnull(input_df[y_column])]
    plt.figure(figsize=(16,8))
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.grid(True)
    #print 'Plot ', filtered_input_df[['DATE', y_column]]
    plt.plot(filtered_input_df['DATE'].values, filtered_input_df[y_column].values)
    plt.xlabel('Date', fontsize=22)
    plt.ylabel(ylabel, fontsize=22)
    if xlim is not None:
        plt.gca().set_xlim(xlim)
    if ylim is not None:
        plt.gca().set_ylim(ylim)
    plt.plot()

In [None]:
timeseries_instructions = collections.OrderedDict([
    ('real_gdp_growth_rate', {'ylabel': 'Real annual GDP growth rate'}),
    ('sticky_inflation_rate', {'ylabel': 'Annual inflation rate (ignoring food and energy)'}),
    ('federal_deficit_percent_gdp', {'ylabel': 'Annual federal deficit (% GDP)'}),
    ('nonfarm_job_growth_monthly_thousands', {'ylabel': 'Monthly nonfarm job growth rate (thousands)'}),
    ('federal_expenditure_percent_annual_growth_rate', {'ylabel': 'Annual federal spending growth rate (%)'}),
    ('s_and_p_500_percent_annual_growth_rate', {'ylabel': 'S&P500 annual growth rate (%)'})
])

for metric_key, metric_instructions in timeseries_instructions.iteritems():
    make_time_series_plot(combined_df, y_column=metric_key, ylabel=metric_instructions['ylabel'], xlim=None, ylim=None)

In [None]:
def make_annual_plots(input_df, y_column, ylabel, ylim, plot_type):
    input_df['year'] = input_df['DATE'].dt.year
    input_df['month'] = input_df['DATE'].dt.month
    input_df['quarter'] = input_df['month'].apply(lambda x: int((x+2) / 4) + 1)

    x_column = 'month' if plot_type == 'monthly' else 'quarter'
    to_plot_df = input_df[pd.notnull(input_df[y_column])]
    df_2014 = to_plot_df[to_plot_df['year']==2014]
    df_2015 = to_plot_df[to_plot_df['year']==2015]
    df_2016 = to_plot_df[to_plot_df['year']==2016]
    df_2017 = to_plot_df[to_plot_df['year']==2017]
    
    plt.figure(figsize=(16,12))
    plt.tick_params(axis='both', which='major', labelsize=18)
    plt.grid(True)
    xlim = [1,12] if plot_type == 'monthly' else [1,4]
    if len(df_2014)==1:
        print df_2014[y_column].values
        #plt.bar(x=[0.], height=df_2014[y_column].values, width=0.2, color='black')
        plt.bar([0.1], height=df_2016[y_column].values, width=0.2, color='blue')
        plt.bar([0.35], height=df_2015[y_column].values, width=0.2, color='red')
        plt.bar([0.6], height=df_2017[y_column].values, width=0.2, color='purple')
        plt.bar([0.85], height=df_2014[y_column].values, width=0.2, color='black')
        plt.xticks([0.1, 0.35, 0.6, 0.85], ['?', '??', '???', '????'], rotation='vertical')
        xlim=[0., 1.]

    else:
        plt.plot(df_2016[x_column].values, df_2016[y_column].values, color='blue', label='?')
        plt.plot(df_2015[x_column].values, df_2015[y_column].values, color='red', label='??')       
        plt.plot(df_2017[x_column].values, df_2017[y_column].values, color='purple', label='???')
        plt.plot(df_2014[x_column].values, df_2014[y_column].values, color='black', label='????')
        plt.legend(fontsize=22)
        xlabel = 'Month' if plot_type == 'monthly' else 'Quarter'
        plt.xlabel(xlabel, fontsize=22)

    plt.ylabel(ylabel, fontsize=22)
    plt.gca().set_xlim(xlim)
    if ylim is not None:
        plt.gca().set_ylim(ylim)
    plt.show()

In [None]:
annual_plot_instructions = collections.OrderedDict([
    ('real_gdp_growth_rate', {'ylabel': 'Real annual GDP growth rate', 'plot_type': 'quarterly'}),
    ('sticky_inflation_rate', {'ylabel': 'Annual inflation rate (ignoring food and energy)', 'plot_type': 'monthly'}),
    ('federal_deficit_percent_gdp', {'ylabel': 'Annual federal deficit (% GDP)', 'plot_type': 'monthly'}),
    ('nonfarm_job_growth_monthly_thousands', {'ylabel': 'Monthly nonfarm job growth rate (thousands)', 'plot_type': 'monthly'}),
    ('federal_expenditure_percent_annual_growth_rate', {'ylabel': 'Annual federal spending growth rate (%)', 'plot_type': 'quarterly'}),
    ('s_and_p_500_percent_annual_growth_rate', {'ylabel': 'S&P500 annual growth rate (%)', 'plot_type': 'monthly'})
])

for metric_key, metric_instructions in annual_plot_instructions.iteritems():
    #print 'metric_instructions: ', metric_instructions
    make_annual_plots(combined_df, y_column=metric_key, ylabel=metric_instructions['ylabel'], ylim=None, plot_type=metric_instructions['plot_type'])