In [1]:
import pyfredapi as pf 
# ^ ** install this package "pyfredapi" via "pip install pyfredapi" (or add to your requirements.txt if you have a virtual environment)
import pandas as pd
import numpy as np
import time
import typing

In [103]:
# --- CONSTANTS ---

# API key: 
# Sign up for your own API key at: https://fred.stlouisfed.org/docs/api/api_key.html
API_KEY = '10e0969f13a4b82bc47d736e1047d303'

# Location to store / serialize data
# Modify for your needs
DATA_ROOT_DIR = '/Users/jonathanl/fred/data'

# Seconds to sleep in between repeated hits to API
SLEEP_SEC = 1

# Category Tree Construction
# Base / Root / Top-Level Categories
ROOT_CATEGORIES = [
    {'name': 'Money, Banking, & Finance', 'id': 32991, 'children': []},
    {'name': 'Population, Employment, & Labor Markets', 'id': 10, 'children': []},
    {'name': 'National Accounts', 'id': 32992, 'children': []},
    {'name': 'Production & Business Activity', 'id': 1, 'children': []},
    {'name': 'Prices', 'id': 32455, 'children': []},
    {'name': 'International Data', 'id': 32263, 'children': []},
    {'name': 'U.S. Regional Data', 'id': 3008, 'children': []},
    {'name': 'Academic Data', 'id': 33060, 'children': []},    
]

In [104]:
# --- FUNCTIONS, ALL ---

def build_category_tree(category_tree: typing.List[dict]) -> typing.List[dict]:
    '''
    Build full category tree
    BFS through nodes, retrieving child categories
    Stop retrieval @ terminal node level
    '''
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        time.sleep(SLEEP_SEC)
        print('.', end='')
        categories = pf.get_category_children(category_id=elem['id'], api_key=API_KEY)
        children = categories['categories']
        elem['children'] = children        
        if len(children) > 0:
            queue += children
    
    return category_tree


def build_category_tree_table(category_tree: typing.List[dict]) -> pd.DataFrame:
    '''
    Given full category tree
    Convert to pandas dataframe / table
    For each child in category tree, create id reference to parent category node
    '''    
    flattened_nodes = []
    
    # Initial queue - set parent (i.e. 0)
    queue = category_tree.copy()
    for child in queue:
        if 'parent_id' not in child:
            child['parent_id'] = 0
    
    while len(queue) > 0:
        curr_node = queue.pop(0)
        curr_dict = {k:curr_node[k] for k in ('id', 'name', 'parent_id')}
        flattened_nodes.append(curr_dict)

        if len(curr_node['children']) > 0:
            children = curr_node['children']
            for child in children:
                child['parent_id'] = curr_node['id']
            queue.extend(children)
    
    return pd.DataFrame.from_records(flattened_nodes)
    

def extract_terminal_categories(category_tree: typing.List[dict]) -> typing.List[dict]:
    '''
    Given full category tree
    Retrieve terminal nodes i.e. nodes without child categories
        (will be used for series retrieval)
    '''
    terminal_nodes = []
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        if len(elem['children']) < 1:
            terminal_nodes.append(elem)
        else:
            queue.extend(elem['children'])
    return terminal_nodes


def build_series_meta(category_ids: typing.List[int]) -> pd.DataFrame:
    '''
    Given list of category ids, get associated series
    Category ids are expected to be from terminal categories
    Link each series to its immediate parent category (terminal category)
    Series will consist of series metadata + actual series data (with all / full revisions)
    '''
    series_frames = []
    for category_id in category_ids:
        print('.', end='')
        category_series = pf.get_category_series(category_id=category_id, api_key=API_KEY)
        if len(category_series) > 0:
            series_df = pd.DataFrame.from_records([vars(series_info) for series_info in category_series.values()])
            series_df['category_id'] = category_id
            series_frames.append(series_df)
        time.sleep(SLEEP_SEC)
    
    return pd.concat(series_frames)


def get_single_series_data(series_id: str) -> pd.DataFrame:
    '''
    full time series data, all revisions, for single series
    add series id to dataframe to allow for querying / partitioning
    '''    
    try:
        data = pf.get_series_all_releases(series_id, api_key=API_KEY)
        data['series'] = series_id
        return data
    except:
        print(f'!! failed to get: {series_id} !!')
        return None
    

def build_series_data(series_ids: typing.List[str]) -> pd.DataFrame:
    '''
    Given: list of series ids
    Retrieve full history, return single dataframe         
    '''

    series_dfs = []
    for series_id in series_ids:
        print(f'{series_id}, ', end='')
        series_data = get_single_series_data(series_id)
        if series_data is not None:
            series_dfs.append(series_data)
        time.sleep(SLEEP_SEC)
    
    return pd.concat(series_dfs)


In [105]:
#  Starting with root categories, retrieve full category tree structure
#  ** Note: this will take a while **
category_tree = build_category_tree(ROOT_CATEGORIES)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [106]:
#  Convert tree to dataframe format
category_tree_df = build_category_tree_table(category_tree)

In [107]:
# Persist category tree
pd.to_pickle(category_tree_df, open(f'{DATA_ROOT_DIR}/fred-categories.pkl', 'wb'))
# category_tree_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-categories.pkl', 'rb'))

In [108]:
all_category_ids = list(category_tree_df.id.unique())

In [109]:
# Get all series categorized
# Given terminal categories, get all series metadata
# ** Note: this will take a while **
series_meta_df = build_series_meta(all_category_ids)

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [110]:
# Persist series metadata
pd.to_pickle(series_meta_df, open(f'{DATA_ROOT_DIR}/fred-series-meta.pkl', 'wb'))

In [5]:
# series_meta_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-series-meta.pkl', 'rb'))

In [112]:
# Filter series to those above a popularity score
# Note: now that you have the full series definitions + categories, there is a lot that you can do here
series_popularity_gte_50_df = series_meta_df[series_meta_df.popularity >= 50]

In [114]:
series_popularity_gte_50_df.shape

(753, 17)

In [115]:
# Retrieve full data series download candidates
# ** Note: this will take a while **
series_data_popularity_gte_50_df = build_series_data(series_popularity_gte_50_df.id)

NROU, NROUST, EXPINF10YR, EXPINF1YR, EXPINF2YR, MICH, REAINTRATREARAT10Y, REAINTRATREARAT1YE, CNP16OV, LNS14000006, U2RATE, U6RATE, MEHOINUSA646N, MEHOINUSA672N, MEPAINUSA672N, WFRBLB50107, WFRBST01134, CNP16OV, POPTHM, TTLHH, TTLHHM156N, STTMINWGMA, STTMINWGMI, STTMINWGNJ, STTMINWGOH, STTMINWGPA, STTMINWGTX, STTMINWGVA, CCSA, IC4WSA, ICSA, IITTRHB, FDHBFIN, FDHBFRBN, FYFRGDA188S, FYFSD, FYFSGDA188S, FYGFDPUN, FYOIGDA188S, FYONGDA188S, GFDEBTN, GFDEGDQ188S, GFDGDPA188S, MTSDS133FMS, MVMTD027MNFRBDAL, JHDUSRGDPBR, JHGDPBRINDX, USPHCI, USREC, USRECD, USSLIND, TLNRESCONS, TLRESCONS, TTLCONS, ACTLISCOU12420, ACTLISCOU19100, ACTLISCOU29820, ACTLISCOU38060, INDPRO, MNFCTRIRSA, ECOMPCTSA, ECOMSA, MRTSSM44112USN, MRTSSM44X72USS, RETAILIMSA, RETAILIRSA, RRSFS, RSAFS, RSXFS, FRGSHPUSM649NCIS, M12MTVUSM227NFWA, TRFVOLUSM227NFWA, TRUCKD11, APU0000701111, APU0000702111, APU0000703112, APU0000708111, APU0000709112, APU0000718311, APU000072610, APU0000FF1101, APU0000FN1101, APU0000FS1101, CORESTICKM1

MVLOAS, REVOLSL, RIFLPBCIANM60NM, SLOAS, TERMCBAUTO48NS, TERMCBCCALLNS, TERMCBPER24NS, TOTALSL, DRALACBN, DRALACBS, DRBLACBS, DRCCLACBS, DRCCLT100S, DRCLACBS, DRCRELEXFACBS, DRSFRMACBS, CORCCACBS, TMBACBW027SBOG, USGSEC, DRTSCILM, DRTSCIS, DRTSCLCC, STDSAUTO, CLF16OV, CIVPART, LNS11300060, LNS11324230, CE16OV, EMRATIO, LNS12300060, UNEMPLOY, LNS14000002, UNRATE, UNRATENSA, LNS11300001, LNS11300002, FEDMINNFRWG, LES1252881600Q, PAYEMS, AHETPI, AWHAETP, CES0500000003, USPRIV, CES1021100001, USMINE, CES2000000003, USCONS, AWHMAN, CES3000000003, MANEMP, USWTRADE, USTRADE, CES4300000001, CES4348100001, CES4348400001, USINFO, USFIRE, TEMPHELPS, USPBS, USEHS, CES7000000003, USLAH, USSERV, USGOVT, JTSJOL, JTSQUR, JTSLDL, GPDI, GPDIC1, GPSAVE, PMSAVE, PRFI, PSAVE, PSAVERT, A191RL1Q225SBEA, A191RP1Q027SBEA, A939RC0Q052SBEA, A939RX0Q048SBEA, GDP, GDPA, GDPC1, GDPC1CTM, GDPCA, GDPNOW, GDPPOT, GNP, NA000334Q, NGDPPOT, STLENI, WEI, A091RC1Q027SBEA, FGEXPND, GCEC1, M318501Q027NBEA, W006RC1Q027SBEA, W

In [116]:
# Persist series data
pd.to_pickle(series_data_popularity_gte_50_df, open(f'{DATA_ROOT_DIR}/fred-series-data-pop-gte-50.pkl', 'wb'))

In [6]:
series_data_popularity_gte_50_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-series-data-pop-gte-50.pkl', 'rb'))

In [111]:
series_meta_df[series_meta_df.id.isin(['PCEPI', 'EFFR', 'WTREGEN', 'DTWEXBGS', 'SP500', 'WALCL'])]

Unnamed: 0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes,group_popularity,category_id
89,PCEPI,2023-03-26,2023-03-26,Personal Consumption Expenditures: Chain-type ...,1959-01-01,2023-01-01,Monthly,M,Index 2012=100,Index 2012=100,Seasonally Adjusted,SA,2023-02-24 07:43:07-06,77,BEA Account Code: DPCERG\r\n\r\nThe Personal C...,78,9
9,EFFR,2023-03-26,2023-03-26,Effective Federal Funds Rate,2000-07-03,2023-03-23,Daily,D,Percent,%,Not Seasonally Adjusted,NSA,2023-03-24 08:01:02-05,79,For additional historical federal funds rate d...,79,118
25,DTWEXBGS,2023-03-26,2023-03-26,Nominal Broad U.S. Dollar Index,2006-01-02,2023-03-17,Daily,D,Index Jan 2006=100,Index Jan 2006=100,Not Seasonally Adjusted,NSA,2023-03-20 15:20:09-05,76,,76,94
2,DTWEXBGS,2023-03-26,2023-03-26,Nominal Broad U.S. Dollar Index,2006-01-02,2023-03-17,Daily,D,Index Jan 2006=100,Index Jan 2006=100,Not Seasonally Adjusted,NSA,2023-03-20 15:20:09-05,76,,76,105
485,WALCL,2023-03-26,2023-03-26,Assets: Total Assets: Total Assets (Less Elimi...,2002-12-18,2023-03-22,"Weekly, As of Wednesday",W,Millions of U.S. Dollars,Mil. of U.S. $,Not Seasonally Adjusted,NSA,2023-03-23 15:33:03-05,98,,98,32215
646,WTREGEN,2023-03-26,2023-03-26,Liabilities and Capital: Liabilities: Deposits...,1986-01-08,2023-03-22,"Weekly, Ending Wednesday",W,Billions of U.S. Dollars,Bil. of U.S. $,Not Seasonally Adjusted,NSA,2023-03-23 15:34:03-05,78,This account is the primary operational accoun...,78,32215
6,SP500,2023-03-26,2023-03-26,S&P 500,2013-03-25,2023-03-24,"Daily, Close",D,Index,Index,Not Seasonally Adjusted,NSA,2023-03-24 19:13:28-05,83,The observations for the S&P 500 represent the...,83,32255


In [174]:
series_meta_df[series_meta_df.frequency == 'Daily']

Unnamed: 0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes,group_popularity,category_id
13,MEXINTDUSD,2023-03-26,2023-03-26,Mexican Intervention: Banco de Mexico Purchase...,1997-10-27,2011-05-31,Daily,D,Millions of USD,Mil. of USD,Not Seasonally Adjusted,NSA,2011-07-01 11:01:25-05,9,Source: Banco de Mexico: http://www.banxico.or...,9,32145
0,THREEFF1,2023-03-26,2023-03-26,Fitted Instantaneous Forward Rate 1 Year Hence,1990-01-02,2023-03-17,Daily,D,Percent,%,Not Seasonally Adjusted,NSA,2023-03-21 15:22:05-05,34,Kim and Wright (2005) produced this data by fi...,34,33825
1,THREEFF10,2023-03-26,2023-03-26,Fitted Instantaneous Forward Rate 10 Years Hence,1990-01-02,2023-03-17,Daily,D,Percent,%,Not Seasonally Adjusted,NSA,2023-03-21 15:21:10-05,45,Kim and Wright (2005) produced this data by fi...,45,33825
2,THREEFF2,2023-03-26,2023-03-26,Fitted Instantaneous Forward Rate 2 Years Hence,1990-01-02,2023-03-17,Daily,D,Percent,%,Not Seasonally Adjusted,NSA,2023-03-21 15:22:06-05,49,Kim and Wright (2005) produced this data by fi...,49,33825
3,THREEFF3,2023-03-26,2023-03-26,Fitted Instantaneous Forward Rate 3 Years Hence,1990-01-02,2023-03-17,Daily,D,Percent,%,Not Seasonally Adjusted,NSA,2023-03-21 15:22:06-05,15,Kim and Wright (2005) produced this data by fi...,15,33825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38,BAMLEMRACRPIASIATRIV,2023-03-26,2023-03-26,ICE BofA Asia Emerging Markets Corporate Plus ...,1998-12-31,2023-03-23,Daily,D,Index,Index,Not Seasonally Adjusted,NSA,2023-03-24 09:38:04-05,15,The ICE BofA Asia Emerging Markets Corporate P...,15,32413
39,BAMLEMRECRPIEMEATRIV,2023-03-26,2023-03-26,ICE BofA EMEA Emerging Markets Corporate Plus ...,1998-12-31,2023-03-23,Daily,D,Index,Index,Not Seasonally Adjusted,NSA,2023-03-24 09:38:10-05,1,"The ICE BofA Europe, the Middle East, and Afri...",1,32413
40,BAMLEMRLCRPILATRIV,2023-03-26,2023-03-26,ICE BofA Latin America Emerging Markets Corpor...,1998-12-31,2023-03-23,Daily,D,Index,Index,Not Seasonally Adjusted,NSA,2023-03-24 09:37:20-05,3,The ICE BofA Latin America Emerging Markets Co...,3,32413
41,BAMLEMUBCRPIUSTRIV,2023-03-26,2023-03-26,ICE BofA US Emerging Markets Corporate Plus In...,1998-12-31,2023-03-23,Daily,D,Index,Index,Not Seasonally Adjusted,NSA,2023-03-24 09:37:21-05,2,The ICE BofA US Emerging Markets Corporate Plu...,2,32413


In [163]:
rels_dts = pf.get_releases_dates(api_key=API_KEY, sort_order='desc', limit=1000)

In [176]:
sorted([(rel['date'], rel['release_id'], rel['release_name']) for rel in rels_dts['release_dates']], key=lambda x: x[0], reverse=True)

[('2023-03-26', 101, 'FOMC Press Release'),
 ('2023-03-25', 441, 'Coinbase Cryptocurrencies'),
 ('2023-03-25', 101, 'FOMC Press Release'),
 ('2023-03-24', 441, 'Coinbase Cryptocurrencies'),
 ('2023-03-24', 86, 'Commercial Paper'),
 ('2023-03-24', 72, 'Daily Treasury Inflation-Indexed Securities'),
 ('2023-03-24', 453, 'Distributional Financial Accounts'),
 ('2023-03-24', 279, 'Economic Policy Uncertainty'),
 ('2023-03-24', 502, 'Euro Short Term Rate'),
 ('2023-03-24', 378, 'Federal Funds Data'),
 ('2023-03-24', 101, 'FOMC Press Release'),
 ('2023-03-24', 386, 'GDPNow'),
 ('2023-03-24', 18, 'H.15 Selected Interest Rates'),
 ('2023-03-24',
  22,
  'H.8 Assets and Liabilities of Commercial Banks in the United States'),
 ('2023-03-24', 504, 'Historical Overnight AMERIBOR Unsecured Interest Rate'),
 ('2023-03-24', 185, 'Interest Rate on Reserve Balances'),
 ('2023-03-24', 304, 'Interest Rate Spreads'),
 ('2023-03-24', 484, 'Key ECB Interest Rates'),
 ('2023-03-24', 446, 'Labor Force Partici

In [151]:
len(rels_dts['release_dates'])

1000

In [170]:
rels_dts['release_dates']

[{'release_id': 101,
  'release_name': 'FOMC Press Release',
  'date': '2023-03-26'},
 {'release_id': 441,
  'release_name': 'Coinbase Cryptocurrencies',
  'date': '2023-03-25'},
 {'release_id': 101,
  'release_name': 'FOMC Press Release',
  'date': '2023-03-25'},
 {'release_id': 441,
  'release_name': 'Coinbase Cryptocurrencies',
  'date': '2023-03-24'},
 {'release_id': 86, 'release_name': 'Commercial Paper', 'date': '2023-03-24'},
 {'release_id': 72,
  'release_name': 'Daily Treasury Inflation-Indexed Securities',
  'date': '2023-03-24'},
 {'release_id': 453,
  'release_name': 'Distributional Financial Accounts',
  'date': '2023-03-24'},
 {'release_id': 279,
  'release_name': 'Economic Policy Uncertainty',
  'date': '2023-03-24'},
 {'release_id': 502,
  'release_name': 'Euro Short Term Rate',
  'date': '2023-03-24'},
 {'release_id': 378,
  'release_name': 'Federal Funds Data',
  'date': '2023-03-24'},
 {'release_id': 101,
  'release_name': 'FOMC Press Release',
  'date': '2023-03-24'

In [136]:
[rel for rel in rels_dts['release_dates'] if rel['release_id'] == 189]

[]

In [145]:
[rels for rels in rels_dts['release_dates'] if 'Standard' in rels['release_name']]

[]

In [139]:
rel_sp500 = pf.get_release(release_id=189, api_key=API_KEY)

In [140]:
rel_sp500

{'realtime_start': '2023-03-26',
 'realtime_end': '2023-03-26',
 'releases': [{'id': 189,
   'realtime_start': '2023-03-26',
   'realtime_end': '2023-03-26',
   'name': 'Standard & Poors',
   'press_release': False,
   'link': 'https://us.spindices.com/indices/equity/sp-500'}]}