In [None]:
import pyfredapi as pf
import pandas as pd
import time
import pickle
import typing

In [None]:
pd.set_option('display.max_rows', 100)

In [None]:
API_KEY = '10e0969f13a4b82bc47d736e1047d303'

In [None]:
# Init w/ base categories
category_tree = [
    {'name': 'Money, Banking, & Finance', 'id': 1, 'children': []},
    {'name': 'Population, Employment, & Labor Markets', 'id': 10, 'children': []},
    {'name': 'National Accounts', 'id': 32992, 'children': []},
    {'name': 'Production & Business Activity', 'id': 1, 'children': []},
    {'name': 'Prices', 'id': 32455, 'children': []},
    {'name': 'International Data', 'id': 32263, 'children': []},
    {'name': 'U.S. Regional Data', 'id': 3008, 'children': []},
    {'name': 'Academic Data', 'id': 33060, 'children': []},    
]

In [None]:
def build_category_tree(category_tree):
    
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        time.sleep(1)
#         print(f'retrieving children for: {elem["name"]}')
        categories = pf.get_category_children(category_id=elem['id'], api_key=API_KEY)
        children = categories['categories']
        elem['children'] = children        
        if len(children) > 0:
            queue += children
    
    return category_tree

In [None]:
category_tree = build_category_tree(category_tree)

In [None]:
category_tree = pickle.load(open('/Users/jonathanl/fred-categories.pkl', 'rb'))

In [None]:
category_tree

In [None]:

def build_category_tree_table(category_tree) -> pd.DataFrame:
    '''
    Builds a pandas dataframe / table from category tree
    '''
    
    flattened_nodes = []
    
    # Initial queue - set parent (i.e. 0)
    queue = category_tree.copy()
    for child in queue:
        child['parent_id'] = 0
    
    while len(queue) > 0:

        curr_node = queue.pop(0)
        curr_dict = {k:curr_node[k] for k in ('id', 'name', 'parent_id')}
        flattened_nodes.append(curr_dict)

        if len(curr_node['children']) > 0:
            children = curr_node['children']
            for child in children:
                child['parent_id'] = curr_node['id']
            queue.extend(children)
    
    return pd.DataFrame.from_records(flattened_nodes)
    

In [None]:
flattened_df = build_category_tree_table(category_tree)

In [None]:
flattened_df

In [None]:
flattened_df.shape

In [None]:
# terminal / leaf categories are all categories who are not a parent
terminal_categories = flattened_df[~flattened_df['id'].isin(flattened_df['parent_id'].unique())]

In [None]:
# Writing to parquet format
flattened_df.to_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/categories/categories.pq')
flattened_df = pd.read_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/categories/categories.pq')

In [None]:
def subtree_categories(category_ids: typing.List[int], category_df: pd.DataFrame) -> typing.List[int]:
    
    # find categories, do they exist?
    categories = category_df[category_df.id.isin(category_ids)]    
    if categories.shape[0] < 1:
        return []

    # walk thru table, getting children (all category ids)
    all_children_category_ids = []
    queue = category_ids
    while len(queue) > 0:

        curr_category_id = queue.pop(0)
        children = category_df[category_df.parent_id == curr_category_id]
        if children.shape[0] > 0:  # if not terminal node
            children_category_ids = list(children.id)
            queue.extend(children_category_ids)
            all_children_category_ids.extend(children_category_ids)
            
    return all_children_category_ids
    

In [None]:
excluded_child_categories = subtree_categories([3008, 32043, 33060, 32263], flattened_df)

In [None]:
len(list(set(excluded_child_categories)))

In [None]:
len(unique(excluded_child_categories))

In [None]:
pickle.dump(list(set(child_categories)), open('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/regional-categories.pkl', 'wb'))

In [None]:
series_meta = pd.read_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/series-meta/series-meta.pq')

In [None]:
series_meta

In [None]:
#. Releases

In [None]:
pf.get_release_series(release_id=10, api_key=API_KEY)

In [None]:
pf.get_releases(last_updated='2023-03-01', api_key=API_KEY)