In [2]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

In [3]:
#Load secret API key from .env file
load_dotenv()

fred_api_key = os.getenv('fred_api_key')
#print('fred_api_key: ', fred_api_key)

In [4]:
#List of featres from api.stlouisfed.org

FEATURE_NAMES = [
    'Fed_Fund_Effective',
    'Index_Global_Real_Econ_Activity',
    'US_CPI_Energy',
    'EU19_PPI',
    'Real_Dollar_Index',
    'USD_EUR_Exchange',
    'Global_Econ_Policy_Uncertainty_Index']

FEATURE_KEYS = [
    'FEDFUNDS',
    'IGREA',
    'USACPIENGMINMEI',
    'EA19PIEAMI01GPM',
    'RTWEXBGS',
    'DEXUSEU',
    'GEPUCURRENT']

In [7]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []

# Pull in data via EIA API
for i in range(len(FEATURE_KEYS)):
    url = 'https://api.stlouisfed.org/fred/series/observations?series_id='+ FEATURE_KEYS[i] +\
    '&frequency=m'+'&api_key='+fred_api_key+'&file_type=json'
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', FEATURE_KEYS[i])
    else:
        print('Error!', r)
    
    # Cast the json pull to pandas dataframe
    json_data = r.json()
    df = pd.DataFrame.from_dict(json_data['observations'])
    df = df.drop(columns = ['realtime_start','realtime_end'])
    df = df.loc[df['value'] != '.']
    df.columns = ['Date', FEATURE_NAMES[i]]
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(2))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)

Success:  FEDFUNDS


Unnamed: 0_level_0,Fed_Fund_Effective
Date,Unnamed: 1_level_1
1954-07-01,0.8
1954-08-01,1.22


Success:  IGREA


Unnamed: 0_level_0,Index_Global_Real_Econ_Activity
Date,Unnamed: 1_level_1
1968-01-01,-11.461982
1968-02-01,-8.1446645


Success:  USACPIENGMINMEI


Unnamed: 0_level_0,US_CPI_Energy
Date,Unnamed: 1_level_1
1960-01-01,11.0401932033811
1960-02-01,10.9909066265802


Success:  EA19PIEAMI01GPM


Unnamed: 0_level_0,EU19_PPI
Date,Unnamed: 1_level_1
2000-02-01,1.11317254174397
2000-03-01,1.10091743119266


Success:  RTWEXBGS


Unnamed: 0_level_0,Real_Dollar_Index
Date,Unnamed: 1_level_1
2006-01-01,100.0
2006-02-01,100.2651


Success:  DEXUSEU


Unnamed: 0_level_0,USD_EUR_Exchange
Date,Unnamed: 1_level_1
1999-01-01,1.1591
1999-02-01,1.1203


Success:  GEPUCURRENT


Unnamed: 0_level_0,Global_Econ_Policy_Uncertainty_Index
Date,Unnamed: 1_level_1
1997-01-01,76.90647560253075
1997-02-01,79.42519131595101


In [8]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: Fed_Fund_Effective (1*819)
1: Index_Global_Real_Econ_Activity (1*655)
2: US_CPI_Energy (1*751)
3: EU19_PPI (1*270)
4: Real_Dollar_Index (1*201)
5: USD_EUR_Exchange (1*285)
6: Global_Econ_Policy_Uncertainty_Index (1*308)


In [9]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')

In [10]:
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

In [11]:
display(df.head(3).T)

Unnamed: 0,0,1,2
Date,1954-07-01 00:00:00,1954-08-01 00:00:00,1954-09-01 00:00:00
Fed_Fund_Effective,0.80,1.22,1.07
Index_Global_Real_Econ_Activity,,,
US_CPI_Energy,,,
EU19_PPI,,,
Real_Dollar_Index,,,
USD_EUR_Exchange,,,
Global_Econ_Policy_Uncertainty_Index,,,


In [12]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_save = path + 'fred_data_raw.csv'
df.to_csv(file_save)

print('Save:\n', file_save)

Save:
 D:\gitProjects\springboard_capstone_1\Springboard_Capstone_01\data\interim\fred_data_raw.csv
