# Read data from FRED API
The FRED® API is a web service that allows developers to write programs and build applications that retrieve economic data from the FRED® and ALFRED® websites hosted by the Economic Research Division of the Federal Reserve Bank of St. Louis. Requests can be customized according to data source, release, category, series, and other preferences.
https://fred.stlouisfed.org/docs/api/fred/

## Import Libraries

In [1]:
#Import the required modules
import os
from dotenv import load_dotenv

import requests
import json

import pandas as pd
import numpy as np

import pickle

## Load API key from ".env"

In [2]:
#Load secret API key from .env file
load_dotenv()

fred_api_key = os.getenv('fred_api_key')
#print('fred_api_key: ', fred_api_key)

## Specify FRED features to import

In [3]:
#List of featres from api.stlouisfed.org

FEATURE_KEYS = [
    'FEDFUNDS',
    'IGREA',
    'USACPIENGMINMEI',
    'EA19PIEAMI01GPM',
    'RTWEXBGS',
    'DEXUSEU',
    'GEPUCURRENT']

In [4]:
FEATURE_NAMES = ["fedfund_effective",
				"global_real_econ_activity",
				"us_cpi_energy",
				"eu19_ppi",
				"real_dollar_index",
				"usd_eur_exchange",
				"global_econ_policy_uncert"]

## Request to FRED API & store respose

In [5]:
# Initialize list; this is the final list that will store all the data from the json pull. 
read_data = []
i = 0

# Pull in data via EIA API
for feature in FEATURE_KEYS:
    url = 'https://api.stlouisfed.org/fred/series/observations?series_id='+ feature +\
    '&frequency=m'+'&api_key='+fred_api_key+'&file_type=json'
               
    r = requests.get(url)
    json_data = r.json()
    
    if r.status_code == 200:
        print('Success: ', feature)
    else:
        print('Error!', r)
    
    # Cast the json pull to pandas dataframe
    json_data = r.json()
    df = pd.DataFrame.from_dict(json_data['observations'])
    df = df.drop(columns = ['realtime_start','realtime_end'])
    df = df.loc[df['value'] != '.']
    df.columns = ['Date', FEATURE_NAMES[i]]
    i += 1

    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors="raise")
    df.set_index('Date', drop=True, inplace=True)
    
    display(df.head(2))
    
    # Concat the pandas dataframe into a list
    read_data.append(df)

Success:  FEDFUNDS


Unnamed: 0_level_0,fedfund_effective
Date,Unnamed: 1_level_1
1954-07-01,0.8
1954-08-01,1.22


Success:  IGREA


Unnamed: 0_level_0,global_real_econ_activity
Date,Unnamed: 1_level_1
1968-01-01,-12.775366
1968-02-01,-9.4520883


Success:  USACPIENGMINMEI


Unnamed: 0_level_0,us_cpi_energy
Date,Unnamed: 1_level_1
1960-01-01,11.0401932033811
1960-02-01,10.9909066265802


Success:  EA19PIEAMI01GPM


Unnamed: 0_level_0,eu19_ppi
Date,Unnamed: 1_level_1
2000-02-01,1.11317254174397
2000-03-01,1.10091743119266


Success:  RTWEXBGS


Unnamed: 0_level_0,real_dollar_index
Date,Unnamed: 1_level_1
2006-01-01,100.0
2006-02-01,100.2631


Success:  DEXUSEU


Unnamed: 0_level_0,usd_eur_exchange
Date,Unnamed: 1_level_1
1999-01-01,1.1591
1999-02-01,1.1203


Success:  GEPUCURRENT


Unnamed: 0_level_0,global_econ_policy_uncert
Date,Unnamed: 1_level_1
1997-01-01,77.06179599547085
1997-02-01,79.94127058972009


## Cast all data to single DataFrame

### Find the feature with most records to append all data

In [6]:
#find the longest dataseries
max_val = len(read_data[0])
max_idx = 0

for i in range(0, len(read_data)):
    name = read_data[i].columns[0]
    size = len(read_data[i])
    phrase = str(i) + ': '+ name + ' (1*' + str(size) + ')'
    
    if size>max_val:
        max_val = size
        max_idx = i
    
    print(phrase)

0: fedfund_effective (1*833)
1: global_real_econ_activity (1*670)
2: us_cpi_energy (1*766)
3: eu19_ppi (1*275)
4: real_dollar_index (1*215)
5: usd_eur_exchange (1*299)
6: global_econ_policy_uncert (1*323)


### Initiate a dataframe with biggest size feature
Using the feature with biggest number of records as the first field ensures that all rows can be appended to the the dataframe

In [7]:
#initiate the df with the the longest timeseries
df = read_data[max_idx]
df.index = df.index.astype('datetime64[ns]')

### Merge_order all data frames ON date 

In [8]:
for i in range(0, len(read_data)):
    if i != max_idx:
        df_temp = read_data[i]
        df_temp.index = df_temp.index.astype('datetime64[ns]')
        df = pd.merge_ordered(df, df_temp, on = 'Date')

## Check the final dataframe

In [9]:
display(df.head(5).T)

Unnamed: 0,0,1,2,3,4
Date,1954-07-01 00:00:00,1954-08-01 00:00:00,1954-09-01 00:00:00,1954-10-01 00:00:00,1954-11-01 00:00:00
fedfund_effective,0.80,1.22,1.07,0.85,0.83
global_real_econ_activity,,,,,
us_cpi_energy,,,,,
eu19_ppi,,,,,
real_dollar_index,,,,,
usd_eur_exchange,,,,,
global_econ_policy_uncert,,,,,


In [10]:
display(df.tail(5).T)

Unnamed: 0,828,829,830,831,832
Date,2023-07-01 00:00:00,2023-08-01 00:00:00,2023-09-01 00:00:00,2023-10-01 00:00:00,2023-11-01 00:00:00
fedfund_effective,5.12,5.33,5.33,5.33,5.33
global_real_econ_activity,-49.054046,-39.461639,-20.519327,8.9441529,
us_cpi_energy,140.381970970206,145.064195766283,145.890238793465,141.33123043939,
eu19_ppi,,,,,
real_dollar_index,112.7081,114.4273,116.1886,117.7497,115.8498
usd_eur_exchange,1.1067,1.0910,1.0672,1.0565,1.0819
global_econ_policy_uncert,228.34093841876722,213.46043355857896,246.49124909282833,226.05515930683853,246.6281071096776


## Store the data

In [11]:
# Change directory one step back and save as the root directory
ROOT_DIR = os.path.normpath(os.getcwd() + os.sep + os.pardir)

# Change direcotry to the 'data\raw' folder
path = ROOT_DIR + '\\data\\interim\\'

# Set a file name
file_save = path + 'fred_data.csv'
df.to_csv(file_save)

print('Save:\n', file_save)

Save:
 D:\gitProjects\WTI_Crude_Oil_Price_Prediction_Using_ML\data\interim\fred_data.csv
