# fred

> Retrieve and process data from the St. Louis FRED

This module is a wrapper on the `fred_api` module, which itself is a fork of [fredapi](https://github.com/mortada/fredapi).

To use the functions in the `fred` module, you'll need an API key from the St. Louis FRED. 

Get one [here](https://fred.stlouisfed.org/docs/api/api_key.html) and store it in your environment variables under the name `FRED_API_KEY` 

Alternatively, you can supply the API key directly as the `api_key` parameter in each function in the `fred` module.

In [None]:
#| default_exp fred.fred

In [None]:
#| exports
from typing import List, Dict
import os, time

import pandas as pd

import pandasmore as pdm
from finsets.fred.fred_api import Fred
from finsets import RESOURCES

In [None]:
#| exports
PROVIDER = 'Federal Reserve Economic Data (FRED)'
URL = 'https://fred.stlouisfed.org/'
LABELS_FILE = RESOURCES/'fred_variable_descriptions.csv'

In [None]:
#| export 
def get_series_info(series: str=None, # FRED series name
                    api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
                    ) -> pd.Series:
    """Get metadata from FRED for given `series` from FRED"""
    
    return Fred(api_key=api_key).get_series_info(series)

In [None]:
get_series_info('TB3MS')

id                                                                       TB3MS
realtime_start                                                      2023-11-01
realtime_end                                                        2023-11-01
title                        3-Month Treasury Bill Secondary Market Rate, D...
observation_start                                                   1934-01-01
observation_end                                                     2023-10-01
frequency                                                              Monthly
frequency_short                                                              M
units                                                                  Percent
units_short                                                                  %
seasonal_adjustment                                    Not Seasonally Adjusted
seasonal_adjustment_short                                                  NSA
last_updated                                        

In [None]:
#| export
def default_raw_vars() -> pd.DataFrame:
    """List of FRED series that will be used in `download()` if none are specified"""

    return ['TB3MS','DTB3','GS10','DGS10','GS1','DGS1','AAA','BAA','DAAA','DBAA','FEDFUNDS','DFF','CPIAUCSL','CPIAUCNS','INDPRO','IPB50001SQ','UNRATE','GDP','GDPC1','GNP','GNPC96','GDPPOT','USREC','RECPROUSM156N','CFNAI','UMCSENT','MICH','USEPUINDXM','USEPUNEWSINDXM','USEPUINDXD','VIXCLS','VXOCLS']


In [None]:
print(default_raw_vars())

['TB3MS', 'DTB3', 'GS10', 'DGS10', 'GS1', 'DGS1', 'AAA', 'BAA', 'DAAA', 'DBAA', 'FEDFUNDS', 'DFF', 'CPIAUCSL', 'CPIAUCNS', 'INDPRO', 'IPB50001SQ', 'UNRATE', 'GDP', 'GDPC1', 'GNP', 'GNPC96', 'GDPPOT', 'USREC', 'RECPROUSM156N', 'CFNAI', 'UMCSENT', 'MICH', 'USEPUINDXM', 'USEPUNEWSINDXM', 'USEPUINDXD', 'VIXCLS', 'VXOCLS']


In [None]:
#| export
def raw_metadata(vars: List[str]=None, #list of variables requested by user; if None, will use `default_raw_vars()`
                 api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
                 ) -> pd.DataFrame:
    
    if vars is None: vars = default_raw_vars()
    
    df = get_series_info(vars[0], api_key).to_frame().T
    for v in vars[1:]:
        time.sleep(0.1)
        df = pd.concat([df, get_series_info(v).to_frame().T])

    df['min_year'] = pd.to_datetime(df['observation_start']).dt.year 
    df['max_year'] = pd.to_datetime(df['observation_end']).dt.year 
    df = df.drop(['frequency','units', 'seasonal_adjustment'], axis=1)

    labels = pd.read_csv(LABELS_FILE)
    labels = labels.rename({'Variable Name': 'id', 'Description':'name'}, axis=1)

    meta = df.merge(labels, how='left', on='id')
    meta['output_of'] = 'fred.download'
    meta = pdm.order_columns(meta, these_first=['name','id','title','min_year','max_year','output_of'])
    
    meta = meta.rename({'id':'fred_id', 'title':'label', 'frequency_short':'frequency','units_short':'unity', 'seasonal_adjustment_short':'seasonal_adj'}, axis=1)

    return meta
    

In [None]:
#| eval: false
metadata = raw_metadata()

In [None]:
#| eval: false
metadata.head(2)

Unnamed: 0,name,fred_id,label,min_year,max_year,output_of,realtime_start,realtime_end,observation_start,observation_end,frequency,unity,seasonal_adj,last_updated,popularity,notes
0,yield_3mt,TB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",1934,2023,fred.download,2023-11-01,2023-11-01,1934-01-01,2023-10-01,M,%,NSA,2023-11-01 15:21:08-05,77,"Averages of Business Days, Discount Basis"
1,yield_3mt,DTB3,"3-Month Treasury Bill Secondary Market Rate, D...",1954,2023,fred.download,2023-11-01,2023-11-01,1954-01-04,2023-10-31,D,%,NSA,2023-11-01 15:20:09-05,75,Discount Basis


In [None]:
#| export
def parse_varlist(vars: List[str]=None, #list of variables requested by user; if None, will use `default_raw_vars()`
                  api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
                  ) -> Dict[str, list]:
    "Splits `vars` by frequency and returns dict with one pd.DataFrame per frequency"
     
    if vars is None: vars = default_raw_vars()

    df = pd.DataFrame(columns=['series','freq'])
    for series in vars:
        freq = get_series_info(series, api_key)['frequency_short']
        df = pd.concat([df,pd.DataFrame({'series': [series], 'freq': [freq]})], ignore_index=True)

    vars_by_freq = {}
    frequencies = list(df['freq'].value_counts().index)
    for f in frequencies:
        vars_with_this_freq = list(df['series'].loc[df.freq==f])
        vars_by_freq[f] = vars_with_this_freq

    return vars_by_freq

In [None]:
#| eval: false
varlist = parse_varlist()

In [None]:
#| eval: false
print(varlist['Q'])

['IPB50001SQ', 'GDP', 'GDPC1', 'GNP', 'GNPC96', 'GDPPOT']


In [None]:
#| export 
def download(vars: str=None, # FRED series name
               api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
               ) -> Dict[str, pd.DataFrame]: 
    """Retrieves `vars` from FRED, splits them by frequency and returns dict with one pd.DataFrame per frequency"""

    api = Fred(api_key=api_key)

    varlist = parse_varlist(vars)

    out = {}
    for freq, vars in varlist.items():
        out[freq] = pd.concat([api.get_series(series).to_frame(name=series) for series in vars], axis=1).dropna(how='all')
        
    return out

In [None]:
#| eval: false
rdat = download()

In [None]:
#| eval: false
rdat.keys()

dict_keys(['M', 'D', 'Q'])

In [None]:
#| eval: false
rdat['Q']

Unnamed: 0,IPB50001SQ,GDP,GDPC1,GNP,GNPC96,GDPPOT
1919-01-01,4.6783,,,,,
1919-04-01,4.7141,,,,,
1919-07-01,5.2339,,,,,
1919-10-01,5.1085,,,,,
1920-01-01,5.5835,,,,,
...,...,...,...,...,...,...
2024-10-01,,,,,,21052.30
2025-01-01,,,,,,21143.69
2025-04-01,,,,,,21235.79
2025-07-01,,,,,,21329.03


In [None]:
#| eval: false
rdat['M']

Unnamed: 0,TB3MS,GS10,GS1,AAA,BAA,FEDFUNDS,CPIAUCSL,CPIAUCNS,INDPRO,UNRATE,USREC,RECPROUSM156N,CFNAI,UMCSENT,MICH,USEPUINDXM,USEPUNEWSINDXM
1854-12-01,,,,,,,,,,,1.0,,,,,,
1855-01-01,,,,,,,,,,,0.0,,,,,,
1855-02-01,,,,,,,,,,,0.0,,,,,,
1855-03-01,,,,,,,,,,,0.0,,,,,,
1855-04-01,,,,,,,,,,,0.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-01,5.16,3.75,5.24,4.65,5.75,5.08,303.841,305.109,102.2924,3.6,0.0,0.28,-0.40,64.2,3.3,145.02995,179.30804
2023-07-01,5.25,3.90,5.37,4.66,5.74,5.12,304.348,305.691,103.2895,3.5,0.0,0.06,0.20,71.5,3.4,117.76544,132.71063
2023-08-01,5.30,4.17,5.37,4.95,6.02,5.33,306.269,307.026,103.3170,3.8,0.0,0.02,-0.22,69.4,3.5,88.46748,76.84670
2023-09-01,5.32,4.38,5.44,5.13,6.16,5.33,307.481,307.789,103.6115,3.8,0.0,0.14,0.02,67.9,3.2,110.09229,113.80537


In [None]:
#| eval: false
rdat['D']

Unnamed: 0,DTB3,DGS10,DGS1,DAAA,DBAA,DFF,USEPUINDXD,VIXCLS,VXOCLS
1954-01-04,1.33,,,,,,,,
1954-01-05,1.28,,,,,,,,
1954-01-06,1.28,,,,,,,,
1954-01-07,1.31,,,,,,,,
1954-01-08,1.31,,,,,,,,
...,...,...,...,...,...,...,...,...,...
2023-10-27,5.32,4.84,5.39,5.71,6.72,5.33,91.83,21.27,
2023-10-28,,,,,,5.33,59.81,,
2023-10-29,,,,,,5.33,69.80,,
2023-10-30,5.33,4.88,5.41,5.73,6.74,5.33,176.55,19.75,


In [None]:
#| export 
def clean(data: dict=None,        # If None, downloads `vars` using `download` function
          vars: List[str]=None,         # If None, downloads `default_raw_vars`
        labels: str=None, # Name you want to give to the series in the output DataFrame. If None, uses lowercase of `series`
        api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
        ) -> pd.DataFrame: 
    """Retrieves series from FRED, cleans the date and sets it as index"""

    if vars is None: vars = default_raw_vars()
    if data is None: data = download(vars, api_key=api_key)
    if labels is None: labels = pd.read_csv(LABELS_FILE).set_index('Variable Name')['Description'].to_dict()

    api = Fred(api_key=api_key)
    out = {}
    
    for freq, df in data.items():  
      df = df.rename(columns=labels).reset_index().rename({'index':'date'},axis=1)
      df = pdm.setup_tseries(df, freq=freq).drop('date', axis=1)
      out[freq] = df 

    return out

In [None]:
#| eval: false
cdat = clean(rdat)

In [None]:
#| eval: false
cdat.keys()

dict_keys(['M', 'D', 'Q'])

In [None]:
#| eval: false
cdat['Q']

Unnamed: 0_level_0,dtdate,indprod_q,nom_gdp,real_gdp,nom_gnp,real_gnp,pot_rgdp
Qdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1919Q1,1919-01-01,4.6783,,,,,
1919Q2,1919-04-01,4.7141,,,,,
1919Q3,1919-07-01,5.2339,,,,,
1919Q4,1919-10-01,5.1085,,,,,
1920Q1,1920-01-01,5.5835,,,,,
...,...,...,...,...,...,...,...
2024Q4,2024-10-01,,,,,,21052.30
2025Q1,2025-01-01,,,,,,21143.69
2025Q2,2025-04-01,,,,,,21235.79
2025Q3,2025-07-01,,,,,,21329.03


In [None]:
#| eval: false
cdat['M']

Unnamed: 0_level_0,dtdate,yield_3mt,yield_10yt,yield_1yt,yield_aaa,yield_baa,yield_fedf,cpi,cpi_nsa,indprod,unemp_rate,rec_dum,rec_prob,cfnai,sent_mich,exp_inflation,pu_bbd,punews_bbd
Mdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1854-12,1854-12-01,,,,,,,,,,,1.0,,,,,,
1855-01,1855-01-01,,,,,,,,,,,0.0,,,,,,
1855-02,1855-02-01,,,,,,,,,,,0.0,,,,,,
1855-03,1855-03-01,,,,,,,,,,,0.0,,,,,,
1855-04,1855-04-01,,,,,,,,,,,0.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06,2023-06-01,5.16,3.75,5.24,4.65,5.75,5.08,303.841,305.109,102.2924,3.6,0.0,0.28,-0.40,64.2,3.3,145.02995,179.30804
2023-07,2023-07-01,5.25,3.90,5.37,4.66,5.74,5.12,304.348,305.691,103.2895,3.5,0.0,0.06,0.20,71.5,3.4,117.76544,132.71063
2023-08,2023-08-01,5.30,4.17,5.37,4.95,6.02,5.33,306.269,307.026,103.3170,3.8,0.0,0.02,-0.22,69.4,3.5,88.46748,76.84670
2023-09,2023-09-01,5.32,4.38,5.44,5.13,6.16,5.33,307.481,307.789,103.6115,3.8,0.0,0.14,0.02,67.9,3.2,110.09229,113.80537


In [None]:
#| eval: false
cdat['D']

Unnamed: 0_level_0,dtdate,yield_3mt,yield_10yt,yield_1yt,yield_aaa,yield_baa,yield_fedf,pu_bbd_d,vix,vxo
Ddate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1954-01-04,1954-01-04,1.33,,,,,,,,
1954-01-05,1954-01-05,1.28,,,,,,,,
1954-01-06,1954-01-06,1.28,,,,,,,,
1954-01-07,1954-01-07,1.31,,,,,,,,
1954-01-08,1954-01-08,1.31,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2023-10-27,2023-10-27,5.32,4.84,5.39,5.71,6.72,5.33,91.83,21.27,
2023-10-28,2023-10-28,,,,,,5.33,59.81,,
2023-10-29,2023-10-29,,,,,,5.33,69.80,,
2023-10-30,2023-10-30,5.33,4.88,5.41,5.73,6.74,5.33,176.55,19.75,


In [None]:
#| export 
def search(search_text: str=None, # What to search for
              order_by: str='popularity', # How to order search results; try `search_rank` if you don't find what you were looking for
              nr_results: int=10, # How many results to output
              api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
              ) -> pd.DataFrame:
    """Search FRED for a given `search_text`, sort by popularity and return only the first `nr_results`"""

    api = Fred(api_key=api_key)  
    return api.search(search_text, order_by=order_by)\
              .pipe(pdm.order_columns, ['title', 'popularity','frequency_short', 'observation_start', 'observation_end'])\
              .iloc[:nr_results].copy()
              

In [None]:
search("three month treasury bill", order_by='popularity',nr_results=3)

Unnamed: 0_level_0,title,popularity,frequency_short,observation_start,observation_end,id,realtime_start,realtime_end,frequency,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",77,M,1934-01-01,2023-10-01,TB3MS,2023-11-01,2023-11-01,Monthly,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:21:08-05:00,"Averages of Business Days, Discount Basis"
DTB3,"3-Month Treasury Bill Secondary Market Rate, D...",75,D,1954-01-04,2023-10-31,DTB3,2023-11-01,2023-11-01,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:20:09-05:00,Discount Basis
DGS3MO,Market Yield on U.S. Treasury Securities at 3-...,73,D,1981-09-01,2023-10-31,DGS3MO,2023-11-01,2023-11-01,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:20:09-05:00,For further information regarding treasury con...


In [None]:
time.sleep(1)
search("three month treasury bill", order_by='search_rank',nr_results=3)

Unnamed: 0_level_0,title,popularity,frequency_short,observation_start,observation_end,id,realtime_start,realtime_end,frequency,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",77,M,1934-01-01,2023-10-01,TB3MS,2023-11-01,2023-11-01,Monthly,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:21:08-05:00,"Averages of Business Days, Discount Basis"
DTB3,"3-Month Treasury Bill Secondary Market Rate, D...",75,D,1954-01-04,2023-10-31,DTB3,2023-11-01,2023-11-01,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:20:09-05:00,Discount Basis
WTB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",45,W,1954-01-08,2023-10-27,WTB3MS,2023-11-01,2023-11-01,"Weekly, Ending Friday",Percent,%,Not Seasonally Adjusted,NSA,2023-10-30 15:27:01-05:00,"Averages of Business Days, Discount Basis"


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()