#### load libraries

In [47]:
# The program runs on Python 3.6
# API key for accessing the ALFRED API: 583d28835966b0340e377c34a107da91
import requests
import pandas as pd
from datetime import datetime
from tqdm import tqdm_notebook

#### preambles

In [54]:
# var_alfred = ['GDPC1', 'GDPCTPI', 'FEDFUNDS', 'PCND', 'PCESV', 'PCDG', 'PRFI', 'PNFI',
#               'PCNDGC96', 'PCESVC96', 'PCDGCC96', 'AWHNONAG', 'CE16OV', 'CNP16OV', 'COMPNFB']
# var_alfred = ['BCNSDODNS']
# var_alfred = ['ASHMA']
var_alfred = ['GDPCTPI', 'PRFI', ] # 'PFNI'
vmonthdays = {'0331', '0630', '0930', '1231'} # vintage date is the last day of each quarter
omonthdays = {'0101', '0401', '0701', '1001'} # observation date is the first day of each quarter

In [55]:
def vintage_dates(start='19470101', end='20190331'):
    '''Return a list of all vintage dates between two dates
       Make sure the starting/ending dates are either both strings or both datetime objects'''
    dates = pd.date_range(start, end)
    return [date for date in dates if date.strftime('%m%d') in vmonthdays]

vdates = vintage_dates()

#### download and reshape data

In [56]:
params = {'api_key': '583d28835966b0340e377c34a107da91', 'file_type': 'json',
          'observation_start': '1947-01-01', 'realtime_start': '1947-01-01', 'realtime_end': '2019-03-31'}
url = 'https://api.stlouisfed.org/fred/series/observations'

In [57]:
def float_or_nan(x):
    '''Convert a string to either a float number or NaN'''
    try:
        return float(x)
    except:
        return float('nan')

In [58]:
for varindex, varname in tqdm_notebook(enumerate(var_alfred), total=len(var_alfred)):
    
    # retreive series from ALFRED
    params.update({'series_id': varname})
    page = requests.get(url, params=params, timeout=20)
    assert page.status_code == 200, f'Cannot download {varname} series, check request'
    
    # convert data type from JSON -> DataFrame
    # convert dates from string obj -> datetime obj
    observations = pd.DataFrame(page.json()['observations'])
    observations['value'] = observations['value'].map(lambda x: float_or_nan(x))
    for column in observations.columns:
        if column != 'value':
            observations[column] = pd.to_datetime(observations[column])
    
    # keep only observation dates in 'omonthdays'
    # change 'date' from datetime obj -> string obj ('1947:Q1')
    observations = observations[observations['date'].map(lambda x: x.strftime('%m%d') in omonthdays)]
    observations['date'] = observations['date'].map(lambda x: str(x.year) + ':Q' + str((x.month+2)//3))
    observations.reset_index(drop=True, inplace=True)
    
    # reshape column to ['date', 'vintage', 'value', 'series']
    for groupindex, (_, group) in enumerate(observations.groupby('date')):
        temp_values = [float('nan')]*len(vdates)
        for _, row in group.iterrows():
            for index, date in enumerate(vdates):
                if row['realtime_start'] <= date <= row['realtime_end']:
                    temp_values[index] = row['value']
        temp_df = pd.DataFrame({'date': row['date'], 'vintage': vdates, 'value': temp_values, 'series': varname})
        df = temp_df if groupindex == 0 else pd.concat([df, temp_df])
    
    # reshape column to ['date', 'SERIES1_VINTAGE1', 'SERIES1_VINTAGE2', ...]
    df['vintage'] = df['vintage'].map(lambda x: str(x.year) + 'Q' + str((x.month+2)//3))
    for index, (vintage, vgroup) in enumerate(df[['date', 'value']].groupby(df['vintage'])):
        vgroup.columns = ['date', varname + '_' + vintage]
        df2 = vgroup if index == 0 else pd.merge(df2, vgroup, how='outer')

    # merge all series
    df_raw = df2 if varindex == 0 else pd.merge(df_raw, df2, on='date', how='outer')

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

In [59]:
# save to disk
df_raw.to_csv('alfred_raw.csv')