# Read data from FRED API
The FRED® API is a web service that allows developers to write programs and build applications that retrieve economic data from the FRED® and ALFRED® websites hosted by the Economic Research Division of the Federal Reserve Bank of St. Louis. Requests can be customized according to data source, release, category, series, and other preferences.
https://fred.stlouisfed.org/docs/api/fred/

## Import Libraries

In [1]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

## Load API key from ".env"

In [2]:
#Load secret API key from .env file
load_dotenv()

fred_api_key = os.getenv('fred_api_key')
#print('fred_api_key: ', fred_api_key)

## Specify FRED features to import

In [3]:
#List of featres from api.stlouisfed.org

FEATURE_KEYS = [
    'FEDFUNDS',
    'IGREA',
    'USACPIENGMINMEI',
    'EA19PIEAMI01GPM',
    'RTWEXBGS',
    'DEXUSEU',
    'GEPUCURRENT']

## Request to FRED API & store respose

In [4]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []

# Pull in data via EIA API
for feature in FEATURE_KEYS:
    url = 'https://api.stlouisfed.org/fred/series/observations?series_id='+ feature +\
    '&frequency=m'+'&api_key='+fred_api_key+'&file_type=json'
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', feature)
    else:
        print('Error!', r)
    
    # Cast the json pull to pandas dataframe
    json_data = r.json()
    df = pd.DataFrame.from_dict(json_data['observations'])
    df = df.drop(columns = ['realtime_start','realtime_end'])
    df = df.loc[df['value'] != '.']
    df.columns = ['Date', feature]
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(2))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)

Success:  FEDFUNDS


Unnamed: 0_level_0,FEDFUNDS
Date,Unnamed: 1_level_1
1954-07-01,0.8
1954-08-01,1.22


Success:  IGREA


Unnamed: 0_level_0,IGREA
Date,Unnamed: 1_level_1
1968-01-01,-12.175628
1968-02-01,-8.8550592


Success:  USACPIENGMINMEI


Unnamed: 0_level_0,USACPIENGMINMEI
Date,Unnamed: 1_level_1
1960-01-01,11.0401932033811
1960-02-01,10.9909066265802


Success:  EA19PIEAMI01GPM


Unnamed: 0_level_0,EA19PIEAMI01GPM
Date,Unnamed: 1_level_1
2000-02-01,1.11317254174397
2000-03-01,1.10091743119266


Success:  RTWEXBGS


Unnamed: 0_level_0,RTWEXBGS
Date,Unnamed: 1_level_1
2006-01-01,100.0
2006-02-01,100.2661


Success:  DEXUSEU


Unnamed: 0_level_0,DEXUSEU
Date,Unnamed: 1_level_1
1999-01-01,1.1591
1999-02-01,1.1203


Success:  GEPUCURRENT


Unnamed: 0_level_0,GEPUCURRENT
Date,Unnamed: 1_level_1
1997-01-01,76.90715771166178
1997-02-01,79.42550090048519


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [5]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: FEDFUNDS (1*825)
1: IGREA (1*662)
2: USACPIENGMINMEI (1*757)
3: EA19PIEAMI01GPM (1*275)
4: RTWEXBGS (1*207)
5: DEXUSEU (1*291)
6: GEPUCURRENT (1*312)


### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [6]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')

### Merge_order all data frames ON date 

In [7]:
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [8]:
display(df.head(5).T)

Unnamed: 0,0,1,2,3,4
Date,1954-07-01 00:00:00,1954-08-01 00:00:00,1954-09-01 00:00:00,1954-10-01 00:00:00,1954-11-01 00:00:00
FEDFUNDS,0.80,1.22,1.07,0.85,0.83
IGREA,,,,,
USACPIENGMINMEI,,,,,
EA19PIEAMI01GPM,,,,,
RTWEXBGS,,,,,
DEXUSEU,,,,,
GEPUCURRENT,,,,,


In [9]:
display(df.tail(5).T)

Unnamed: 0,820,821,822,823,824
Date,2022-11-01 00:00:00,2022-12-01 00:00:00,2023-01-01 00:00:00,2023-02-01 00:00:00,2023-03-01 00:00:00
FEDFUNDS,3.78,4.10,4.33,4.57,4.65
IGREA,-27.431779,-16.194631,-63.449653,-95.968215,
USACPIENGMINMEI,144.386505335272,135.507035658838,139.64365804973,,
EA19PIEAMI01GPM,-5.97115062059711,-4.38815554762755,,,
RTWEXBGS,118.7496,116.3114,114.2377,114.9672,115.5583
DEXUSEU,1.0192,1.0591,1.0777,1.0702,1.0711
GEPUCURRENT,330.2130286739404,250.34368889755442,,,


## Store the data

In [10]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_save = path + 'fred_data.csv'
df.to_csv(file_save)

print('Save:\n', file_save)

Save:
 D:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\fred_data.csv
