In [None]:
import pyfredapi as pf 
# ^ ** install this package "pyfredapi" via "pip install pyfredapi" (or add to your requirements.txt if you have a virtual environment)
import pandas as pd
import numpy as np
import time
import typing

In [177]:
# --- CONSTANTS ---

# API key: 
# Sign up for your own API key at: https://fred.stlouisfed.org/docs/api/api_key.html
API_KEY = '10e0969f13a4b82bc47d736e1047d303'

# Location to store / serialize data
# Modify for your needs
DATA_ROOT_DIR = '/Users/jonathanl/data/fred'

# Seconds to sleep in between repeated hits to API
SLEEP_SEC = 1

# Category Tree Construction
# Base / Root / Top-Level Categories
ROOT_CATEGORIES = [
    {'name': 'Money, Banking, & Finance', 'id': 32991, 'children': []},
    {'name': 'Population, Employment, & Labor Markets', 'id': 10, 'children': []},
    {'name': 'National Accounts', 'id': 32992, 'children': []},
    {'name': 'Production & Business Activity', 'id': 1, 'children': []},
    {'name': 'Prices', 'id': 32455, 'children': []},
    {'name': 'International Data', 'id': 32263, 'children': []},
    {'name': 'U.S. Regional Data', 'id': 3008, 'children': []},
    {'name': 'Academic Data', 'id': 33060, 'children': []},    
]

In [None]:
# --- FUNCTIONS, ALL ---

def build_category_tree(category_tree: typing.List[dict]) -> typing.List[dict]:
    '''
    Build full category tree
    BFS through nodes, retrieving child categories
    Stop retrieval @ terminal node level
    '''
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        time.sleep(SLEEP_SEC)
        print('.', end='')
        categories = pf.get_category_children(category_id=elem['id'], api_key=API_KEY)
        children = categories['categories']
        elem['children'] = children        
        if len(children) > 0:
            queue += children
    
    return category_tree


def build_category_tree_table(category_tree: typing.List[dict]) -> pd.DataFrame:
    '''
    Given full category tree
    Convert to pandas dataframe / table
    For each child in category tree, create id reference to parent category node
    '''    
    flattened_nodes = []
    
    # Initial queue - set parent (i.e. 0)
    queue = category_tree.copy()
    for child in queue:
        if 'parent_id' not in child:
            child['parent_id'] = 0
    
    while len(queue) > 0:
        curr_node = queue.pop(0)
        curr_dict = {k:curr_node[k] for k in ('id', 'name', 'parent_id')}
        flattened_nodes.append(curr_dict)

        if len(curr_node['children']) > 0:
            children = curr_node['children']
            for child in children:
                child['parent_id'] = curr_node['id']
            queue.extend(children)
    
    return pd.DataFrame.from_records(flattened_nodes)
    

def extract_terminal_categories(category_tree: typing.List[dict]) -> typing.List[dict]:
    '''
    Given full category tree
    Retrieve terminal nodes i.e. nodes without child categories
        (will be used for series retrieval)
    '''
    terminal_nodes = []
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        if len(elem['children']) < 1:
            terminal_nodes.append(elem)
        else:
            queue.extend(elem['children'])
    return terminal_nodes


def build_series_meta(category_ids: typing.List[int]) -> pd.DataFrame:
    '''
    Given list of category ids, get associated series
    Category ids are expected to be from terminal categories
    Link each series to its immediate parent category (terminal category)
    Series will consist of series metadata + actual series data (with all / full revisions)
    '''
    series_frames = []
    for category_id in category_ids:
        print('.', end='')
        category_series = pf.get_category_series(category_id=category_id, api_key=API_KEY)
        if len(category_series) > 0:
            series_df = pd.DataFrame.from_records([vars(series_info) for series_info in category_series.values()])
            series_df['category_id'] = category_id
            series_frames.append(series_df)
        time.sleep(SLEEP_SEC)
    
    return pd.concat(series_frames)


def get_single_series_data(series_id: str) -> pd.DataFrame:
    '''
    full time series data, all revisions, for single series
    add series id to dataframe to allow for querying / partitioning
    '''    
    try:
        data = pf.get_series_all_releases(series_id, api_key=API_KEY)
        data['series'] = series_id
        return data
    except:
        print(f'!! failed to get: {series_id} !!')
        return None
    

def build_series_data(series_ids: typing.List[str]) -> pd.DataFrame:
    '''
    Given: list of series ids
    Retrieve full history, return single dataframe         
    '''

    series_dfs = []
    for series_id in series_ids:
        print(f'{series_id}, ', end='')
        series_data = get_single_series_data(series_id)
        if series_data is not None:
            series_dfs.append(series_data)
        time.sleep(SLEEP_SEC)
    
    return pd.concat(series_dfs)


In [None]:
#  Starting with root categories, retrieve full category tree structure
#  ** Note: this will take a while **
category_tree = build_category_tree(ROOT_CATEGORIES)

In [None]:
#  Convert tree to dataframe format
category_tree_df = build_category_tree_table(category_tree)

In [None]:
# Persist category tree
pd.to_pickle(category_tree_df, open(f'{DATA_ROOT_DIR}/fred-categories.pkl', 'wb'))
# category_tree_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-categories.pkl', 'rb'))

In [None]:
all_category_ids = list(category_tree_df.id.unique())

In [None]:
# Get all series categorized
# Given terminal categories, get all series metadata
# ** Note: this will take a while **
series_meta_df = build_series_meta(all_category_ids)

In [None]:
# Persist series metadata
pd.to_pickle(series_meta_df, open(f'{DATA_ROOT_DIR}/fred-series-meta.pkl', 'wb'))

In [None]:
# series_meta_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-series-meta.pkl', 'rb'))

In [None]:
# Filter series to those above a popularity score
# Note: now that you have the full series definitions + categories, there is a lot that you can do here
series_popularity_gte_50_df = series_meta_df[series_meta_df.popularity >= 50]

In [None]:
series_popularity_gte_50_df.shape

In [None]:
# Retrieve full data series download candidates
# ** Note: this will take a while **
series_data_popularity_gte_50_df = build_series_data(series_popularity_gte_50_df.id)

In [None]:
# Persist series data
pd.to_pickle(series_data_popularity_gte_50_df, open(f'{DATA_ROOT_DIR}/fred-series-data-pop-gte-50.pkl', 'wb'))

In [None]:
series_data_popularity_gte_50_df = pd.read_pickle(open(f'{DATA_ROOT_DIR}/fred-series-data-pop-gte-50.pkl', 'rb'))

In [None]:
series_meta_df[series_meta_df.id.isin(['PCEPI', 'EFFR', 'WTREGEN', 'DTWEXBGS', 'SP500', 'WALCL'])]

In [None]:
series_meta_df[series_meta_df.frequency == 'Daily']

In [None]:
rels_dts = pf.get_releases_dates(api_key=API_KEY, sort_order='desc', limit=1000)

In [None]:
sorted([(rel['date'], rel['release_id'], rel['release_name']) for rel in rels_dts['release_dates']], key=lambda x: x[0], reverse=True)

In [None]:
len(rels_dts['release_dates'])

In [None]:
rels_dts['release_dates']

In [None]:
[rel for rel in rels_dts['release_dates'] if rel['release_id'] == 189]

In [None]:
[rels for rels in rels_dts['release_dates'] if 'Standard' in rels['release_name']]

In [None]:
rel_sp500 = pf.get_release(release_id=189, api_key=API_KEY)

In [None]:
rel_sp500