## TO-DO:

- object / file to store info & schema for each series (json file?)
- API calls functions for each data source (extraction)
- standardize column headers (e.g. date columns as 'date', value columns as \<series name\>)
- standardize date types
- Append to landing tables

## Design:

1) Manually choose series IDs.
2) Specify category/ID (FRED) or database/ID (NASDAQ)
3) For FRED, get info via API
4) For NASDAQ, manually input info

In [9]:
import nasdaqdatalink
import pyfredapi as pf

import re
import json
import numpy as np
import pandas as pd

# FRED Data: create json file

In [2]:
FRED_API_KEY = "106419264587f03fcdba13deec1ebdfe"

In [3]:
# keys: category
# values: list of series IDs
select_data_fred = \
    {"Industry": ['VAPGDPAER', 'VAPGDPAERAF', 'VAPGDPAF', 'VAPGDPAFH', 'VAPGDPAI',
                  'VAPGDPAWMS', 'VAPGDPC', 'VAPGDPES', 'VAPGDPESHS', 'VAPGDPFI',
                  'VAPGDPFIRL', 'VAPGDPGPI', 'VAPGDPHCSA', 'VAPGDPI', 'VAPGDPM',
                  'VAPGDPMA', 'VAPGDPMCE', 'VAPGDPMD', 'VAPGDPMN', 'VAPGDPOSEG',
                  'VAPGDPPBS', 'VAPGDPPI', 'VAPGDPPST', 'VAPGDPR', 'VAPGDPRL',
                  'VAPGDPSPI', 'VAPGDPT', 'VAPGDPU', 'VAPGDPW'],
     "GDP/GNP": ['OB000334Q', 'STLENI', 'Y694RC1Q027SBEA', 'WEI', 'BBKMLEIX']
    }

In [5]:
# maps day of week to abbreviation
dct_map_to_abbrev = {"Monday": "MON", "Tuesday": "TUE", "Wednesday": "WED",
                     "Thursday": "THU", "Friday": "FRI", "Saturday": "SAT", "Sunday": "SUN"}

# dictionary mapping series IDs to info
dct_data_info = {}

# fill dictionary
for category in select_data_fred.keys():
    for series_id in select_data_fred[category]:
        
        # get info using API
        dct = {k:v for k,v in pf.get_series_info(series_id, api_key=FRED_API_KEY)}
        
        # standardize frequency tags
        if dct["frequency_short"] == "M":
            dct["frequency_short"] = "MS"
        elif dct["frequency_short"] == "Q":
            dct["frequency_short"] = "QS"
        elif dct["frequency_short"] == "W":
            ending_day = re.match("^(Weekly, Ending )(.*)$", dct["frequency"]).group(2)
            dct["frequency_short"] = "W-" + dct_map_to_abbrev[ending_day]
            
        dct["category"] = category
        dct_data_info[series_id] = dct

# NASDAQ Data: create json file

In [None]:
# nasdaqdatalink.ApiConfig.api_key = "qA7ZJNRJgdefX28HHMYh"

In [None]:
# keys: database
# values: list of dataset IDs
select_data_ndq = \
    {"ML": ['EMHYY', 'AAAEY', 'AATRI', 'BEY', 'AEY', 'BBBEY']
    }

In [6]:
dct_data_info_ndq = \
    {"EMHYY": {"category": "ML",
               "title": "Emerging Markets High Yield Corporate Bond Index Yield",
               "frequency_short": "D"},
     "AAAEY": {"category": "ML",
               "title": "US AAA rated Bond Index (yield)",
               "frequency_short": "D"},
     "AATRI": {"category": "ML",
               "title": "US AA Rated Total Return Index",
               "frequency_short": "D"},
     "BEY": {"category": "ML",
             "title": "US B rated Corporate Bond Index (yield)",
             "frequency_short": "D"},
     "AEY": {"category": "ML",
             "title": "US Corporate Bond A rated Index (yield)",
             "frequency_short": "D"},
     "BBBEY": {"category": "ML",
               "title": "US BBB Bond Index (yield)",
               "frequency_short": "D"},
     "REALLONGTERM": {"category": "USTREASURY",
                      "title": "Treasury Real Long-Term Rates",
                      "frequency_short": "D"},
     "REALYIELD": {"category": "USTREASURY",
                   "title": "Treasury Real Yield Curve Rates",
                   "frequency_short": "D"},
     "BILLRATES": {"category": "USTREASURY",
                   "title": "Treasury Bill Rates",
                   "frequency_short": "D"},
     "YIELD": {"category": "USTREASURY",
               "title": "Treasury Yield Curve Rates",
               "frequency_short": "D"},
}

# Export JSON

In [7]:
dct_data_info.update(dct_data_info_ndq)

In [8]:
# Serializing json
json_data_info = json.dumps(dct_data_info, indent=4)
 
# Writing to sample.json
with open("../Database/data_info.json", "w") as outfile:
    outfile.write(json_data_info)

# Extract FRED Data:

In [12]:
current_year = pd.Timestamp.today().year

In [13]:
params = {"observation_start": "2000-01-01"}

for category in select_data_fred.keys():
    for series_id in select_data_fred[category]:
        (pf.get_series_all_releases(series_id, **params, api_key=FRED_API_KEY)
         .replace(to_replace="9999-12-31", value=f"{current_year}-12-31")
         .rename({"value": series_id}, axis=1)
         .to_csv(f"../Database/{series_id}.csv", index=False)
        )