In [124]:
import pyfredapi as pf
import pandas as pd
import time
import pickle
import typing

In [125]:
pd.set_option('display.max_rows', 100)

In [123]:
API_KEY = '10e0969f13a4b82bc47d736e1047d303'

In [None]:
# Init w/ base categories
category_tree = [
    {'name': 'Money, Banking, & Finance', 'id': 1, 'children': []},
    {'name': 'Population, Employment, & Labor Markets', 'id': 10, 'children': []},
    {'name': 'National Accounts', 'id': 32992, 'children': []},
    {'name': 'Production & Business Activity', 'id': 1, 'children': []},
    {'name': 'Prices', 'id': 32455, 'children': []},
    {'name': 'International Data', 'id': 32263, 'children': []},
    {'name': 'U.S. Regional Data', 'id': 3008, 'children': []},
    {'name': 'Academic Data', 'id': 33060, 'children': []},    
]

In [None]:
def build_category_tree(category_tree):
    
    queue = category_tree.copy()
    while len(queue) > 0:
        elem = queue.pop(0)
        time.sleep(1)
#         print(f'retrieving children for: {elem["name"]}')
        categories = pf.get_category_children(category_id=elem['id'], api_key=API_KEY)
        children = categories['categories']
        elem['children'] = children        
        if len(children) > 0:
            queue += children
    
    return category_tree

In [None]:
category_tree = build_category_tree(category_tree)

In [3]:
category_tree = pickle.load(open('/Users/jonathanl/fred-categories.pkl', 'rb'))

In [4]:
category_tree

[{'name': 'Money, Banking, & Finance',
  'id': 1,
  'children': [{'id': 32262,
    'name': 'Business Cycle Expansions & Contractions',
    'parent_id': 1,
    'children': []},
   {'id': 33936, 'name': 'Business Surveys', 'parent_id': 1, 'children': []},
   {'id': 32436, 'name': 'Construction', 'parent_id': 1, 'children': []},
   {'id': 33940, 'name': 'Emissions', 'parent_id': 1, 'children': []},
   {'id': 33955,
    'name': 'Expenditures',
    'parent_id': 1,
    'children': [{'id': 33957,
      'name': 'Consumer Expenditures',
      'parent_id': 33955,
      'children': []}]},
   {'id': 33490, 'name': 'Finance Companies', 'parent_id': 1, 'children': []},
   {'id': 32216, 'name': 'Health Insurance', 'parent_id': 1, 'children': []},
   {'id': 97,
    'name': 'Housing',
    'parent_id': 1,
    'children': [{'id': 32300,
      'name': 'Housing Units Authorized by Building Permits',
      'parent_id': 97,
      'children': []},
     {'id': 32301,
      'name': 'Housing Units Authorized, Bu

In [12]:

def build_category_tree_table(category_tree) -> pd.DataFrame:
    '''
    Builds a pandas dataframe / table from category tree
    '''
    
    flattened_nodes = []
    
    # Initial queue - set parent (i.e. 0)
    queue = category_tree.copy()
    for child in queue:
        child['parent_id'] = 0
    
    while len(queue) > 0:

        curr_node = queue.pop(0)
        curr_dict = {k:curr_node[k] for k in ('id', 'name', 'parent_id')}
        flattened_nodes.append(curr_dict)

        if len(curr_node['children']) > 0:
            children = curr_node['children']
            for child in children:
                child['parent_id'] = curr_node['id']
            queue.extend(children)
    
    return pd.DataFrame.from_records(flattened_nodes)
    

In [13]:
flattened_df = build_category_tree_table(category_tree)

In [14]:
flattened_df

Unnamed: 0,id,name,parent_id
0,1,"Money, Banking, & Finance",0
1,10,"Population, Employment, & Labor Markets",0
2,32992,National Accounts,0
3,1,Production & Business Activity,0
4,32455,Prices,0
...,...,...,...
5021,33641,"Haverhill-Newburyport-Amesbury Town, MA-NH",33638
5022,33644,"Lawrence-Methuen Town-Salem, MA-NH",33638
5023,33646,"Lowell-Billerica-Chelmsford, MA-NH",33638
5024,33259,Buildings,33258


In [27]:
flattened_df.shape

(5026, 3)

In [30]:
# terminal / leaf categories are all categories who are not a parent
terminal_categories = flattened_df[~flattened_df['id'].isin(flattened_df['parent_id'].unique())]

In [37]:
# Writing to parquet format
flattened_df.to_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/categories/categories.pq')
flattened_df = pd.read_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/categories/categories.pq')

In [71]:
def subtree_categories(category_ids: typing.List[int], category_df: pd.DataFrame) -> typing.List[int]:
    
    # find categories, do they exist?
    categories = category_df[category_df.id.isin(category_ids)]    
    if categories.shape[0] < 1:
        return []

    # walk thru table, getting children (all category ids)
    all_children_category_ids = []
    queue = category_ids
    while len(queue) > 0:

        curr_category_id = queue.pop(0)
        children = category_df[category_df.parent_id == curr_category_id]
        if children.shape[0] > 0:  # if not terminal node
            children_category_ids = list(children.id)
            queue.extend(children_category_ids)
            all_children_category_ids.extend(children_category_ids)
            
    return all_children_category_ids
    

In [78]:
excluded_child_categories = subtree_categories([3008, 32043, 33060, 32263], flattened_df)

In [86]:
len(list(set(excluded_child_categories)))

4675

In [79]:
len(unique(excluded_child_categories))

4692

In [87]:
pickle.dump(list(set(child_categories)), open('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/regional-categories.pkl', 'wb'))

In [91]:
series_meta = pd.read_parquet('/Users/jonathanl/Workspace/quant-workspace/experimental-fred/data/series-meta/series-meta.pq')

In [121]:
series_meta

Unnamed: 0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes,group_popularity,category_id
0,4BIGEUROREC,2023-03-14,2023-03-14,OECD based Recession Indicators for Four Big E...,1960-02-01,2022-08-01,Monthly,M,+1 or 0,+1 or 0,Not Seasonally Adjusted,NSA,2022-12-09 14:52:13-06,1.0,This time series is an interpretation of Organ...,1.0,32262
1,4BIGEURORECD,2023-03-14,2023-03-14,OECD based Recession Indicators for Four Big E...,1960-02-01,2022-08-31,"Daily, 7-Day",D,+1 or 0,+1 or 0,Not Seasonally Adjusted,NSA,2022-12-09 14:47:03-06,1.0,This time series is an interpretation of Organ...,1.0,32262
2,4BIGEURORECDM,2023-03-14,2023-03-14,OECD based Recession Indicators for Four Big E...,1960-02-01,2022-08-31,"Daily, 7-Day",D,+1 or 0,+1 or 0,Not Seasonally Adjusted,NSA,2022-12-09 14:47:03-06,7.0,This time series is an interpretation of Organ...,8.0,32262
3,4BIGEURORECDP,2023-03-14,2023-03-14,OECD based Recession Indicators for Four Big E...,1960-02-01,2022-08-31,"Daily, 7-Day",D,+1 or 0,+1 or 0,Not Seasonally Adjusted,NSA,2022-12-09 14:47:04-06,1.0,This time series is an interpretation of Organ...,1.0,32262
4,4BIGEURORECM,2023-03-14,2023-03-14,OECD based Recession Indicators for Four Big E...,1960-02-01,2022-08-01,Monthly,M,+1 or 0,+1 or 0,Not Seasonally Adjusted,NSA,2022-12-09 14:52:14-06,2.0,This time series is an interpretation of Organ...,8.0,32262
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,SMU25748044300000001,2023-03-14,2023-03-14,All Employees: Transportation and Utilities: T...,1990-01-01,2023-01-01,Monthly,M,Thousands of Persons,Thous. of Persons,Not Seasonally Adjusted,NSA,2023-03-14 02:20:56-05,1.0,,1.0,33646
4,SMU25748044300000001A,2023-03-14,2023-03-14,All Employees: Transportation and Utilities: T...,1990-01-01,2022-01-01,Annual,A,Thousands of Persons,Thous. of Persons,Not Seasonally Adjusted,NSA,2023-03-14 03:40:50-05,1.0,,1.0,33646
5,SMU25748044300000001SA,2023-03-14,2023-03-14,All Employees: Transportation and Utilities: T...,1990-01-01,2023-01-01,Monthly,M,Thousands of Persons,Thous. of Persons,Seasonally Adjusted,SA,2023-03-14 03:09:08-05,1.0,The Federal Reserve Bank of St. Louis seasonal...,1.0,33646
0,WSCNBD01JPA189N,2023-03-14,2023-03-14,Total Buildings by Stage of Construction (DISC...,1955-01-01,2012-01-01,Annual,A,National Currency,National Currency,Not Seasonally Adjusted,NSA,2013-07-23 17:58:46-05,1.0,OECD descriptor ID: WSCNBD01\nOECD unit ID: ML...,2.0,33260


In [None]:
#. Releases

In [129]:
pf.get_release_series(release_id=10, api_key=API_KEY)

{'realtime_start': '2023-03-16',
 'realtime_end': '2023-03-16',
 'order_by': 'series_id',
 'sort_order': 'asc',
 'count': 4609,
 'offset': 0,
 'limit': 1000,
 'seriess': [{'id': 'CPIAPPNS',
   'realtime_start': '2023-03-16',
   'realtime_end': '2023-03-16',
   'title': 'Consumer Price Index for All Urban Consumers: Apparel in U.S. City Average',
   'observation_start': '1914-12-01',
   'observation_end': '2023-02-01',
   'frequency': 'Monthly',
   'frequency_short': 'M',
   'units': 'Index 1982-1984=100',
   'units_short': 'Index 1982-1984=100',
   'seasonal_adjustment': 'Not Seasonally Adjusted',
   'seasonal_adjustment_short': 'NSA',
   'last_updated': '2023-03-14 07:39:04-05',
   'popularity': 21,
   'group_popularity': 64},
  {'id': 'CPIAPPSL',
   'realtime_start': '2023-03-16',
   'realtime_end': '2023-03-16',
   'title': 'Consumer Price Index for All Urban Consumers: Apparel in U.S. City Average',
   'observation_start': '1947-01-01',
   'observation_end': '2023-02-01',
   'frequ

In [127]:
pf.get_releases(last_updated='2023-03-01', api_key=API_KEY)

{'realtime_start': '2023-03-01',
 'realtime_end': '9999-12-31',
 'order_by': 'release_id',
 'sort_order': 'asc',
 'count': 303,
 'offset': 0,
 'limit': 1000,
 'releases': [{'id': 9,
   'realtime_start': '2023-03-01',
   'realtime_end': '9999-12-31',
   'name': 'Advance Monthly Sales for Retail and Food Services',
   'press_release': True,
   'link': 'http://www.census.gov/retail/',
   'notes': 'The U.S. Census Bureau conducts the Advance Monthly Retail Trade and Food Services Survey to provide an early estimate of monthly sales by kind of business for retail and food service firms located in the United States. Each month, questionnaires are mailed to a probability sample of approximately 4,700 employer firms selected from the larger Monthly Retail Trade Survey. Advance sales estimates are computed using a link relative estimator. For each detailed industry, we compute a ratio of current-to previous month weighted sales using data from units for which we have obtained usable responses f