In [97]:
# Dependencies
import requests
from dotenv import load_dotenv
import os
import pandas as pd
import json
from datetime import datetime


## Load the FRED_API_KEY and BEA_API_KEY from the env file
load_dotenv()
FRED_API_KEY = os.getenv('FRED_API_KEY')
BEA_API_KEY = os.getenv('BEA_API_KEY')


APIs: Federal Reserve Economic Data (FRED) and Bureau of Economic Analysis (BEA)

In [98]:
# Set the base URL to Federal Reserve Economic Datasets (FRED):
fred_base_url = "https://api.stlouisfed.org/fred/series/observations?"
# The series id and real time start and end for when the data was recorded on electronic files.
series_id_date="series_id=FEDFUNDS&realtime_start=1996-12-03&realtime_end=2024-12-02&"

# Build URL for FRED "&file_type=json" and verify no spaces in url
fred_query_url=fred_base_url+series_id_date+"api_key="+FRED_API_KEY +'&file_type=json'
fred_query_url=fred_query_url.replace(" ","")

In [99]:
# URL for the Bureau of Economic Analysis (BEA), see the API on setting up the url for other tables

# For "Personal Consumption Expenditures by Major Type of Product, Monthly",
# one would need Table 2.8.5 from the National Income and Product Accounts (NIPA)
# Tables on the BEA website, which has the TableName=T20805 and monthly frequency (M)  

bea_query_url = "https://apps.bea.gov/api/data/?&UserID="+BEA_API_KEY+"&method=GetData&DataSetName=NIPA&TableName=T20805&Frequency=M&Year=ALL&ResultFormat=json"
# Verify no spaces in url
bea_query_url=bea_query_url.replace(" ","")

In [100]:
#requesting FRED url
fred_response_data = requests.get(fred_query_url)
fred_data = fred_response_data.json()
print(json.dumps(fred_data, indent=4))

#real time start and end 1996-12-03 and 2024-12-02

{
    "realtime_start": "1996-12-03",
    "realtime_end": "2024-12-02",
    "observation_start": "1600-01-01",
    "observation_end": "9999-12-31",
    "units": "lin",
    "output_type": 1,
    "file_type": "json",
    "order_by": "observation_date",
    "sort_order": "asc",
    "count": 899,
    "offset": 0,
    "limit": 100000,
    "observations": [
        {
            "realtime_start": "1996-12-03",
            "realtime_end": "2024-12-02",
            "date": "1954-07-01",
            "value": "0.80"
        },
        {
            "realtime_start": "1996-12-03",
            "realtime_end": "2024-12-02",
            "date": "1954-08-01",
            "value": "1.22"
        },
        {
            "realtime_start": "1996-12-03",
            "realtime_end": "2020-07-20",
            "date": "1954-09-01",
            "value": "1.06"
        },
        {
            "realtime_start": "2020-07-21",
            "realtime_end": "2020-08-04",
            "date": "1954-09-01",
         

In [101]:
bea_response_data = requests.get(bea_query_url)
bea_data = bea_response_data.json()
# Print out commented out because the dump prints out API key 
# print(json.dumps(bea_data, indent=4))

In [102]:
#In order to read and plot observations of data,
#one must get the date and value nested inside of the json dictionary
fred_df = pd.json_normalize(fred_data,'observations')

fred_df.tail(10)

Unnamed: 0,realtime_start,realtime_end,date,value
889,2024-03-01,2024-12-02,2024-02-01,5.33
890,2024-04-01,2024-12-02,2024-03-01,5.33
891,2024-05-01,2024-12-02,2024-04-01,5.33
892,2024-06-03,2024-12-02,2024-05-01,5.33
893,2024-07-01,2024-12-02,2024-06-01,5.33
894,2024-08-01,2024-12-02,2024-07-01,5.33
895,2024-09-03,2024-12-02,2024-08-01,5.33
896,2024-10-01,2024-12-02,2024-09-01,5.13
897,2024-11-01,2024-12-02,2024-10-01,4.83
898,2024-12-02,2024-12-02,2024-11-01,4.64


In [103]:
# Copy dataframe to further modify dataframe 
fred_api_df=fred_df.copy()
fred_api_df

Unnamed: 0,realtime_start,realtime_end,date,value
0,1996-12-03,2024-12-02,1954-07-01,0.80
1,1996-12-03,2024-12-02,1954-08-01,1.22
2,1996-12-03,2020-07-20,1954-09-01,1.06
3,2020-07-21,2020-08-04,1954-09-01,1.07
4,2020-08-05,2020-10-12,1954-09-01,1.06
...,...,...,...,...
894,2024-08-01,2024-12-02,2024-07-01,5.33
895,2024-09-03,2024-12-02,2024-08-01,5.33
896,2024-10-01,2024-12-02,2024-09-01,5.13
897,2024-11-01,2024-12-02,2024-10-01,4.83


In [104]:
# Drop unneeded columns

fred_api_df.drop(['realtime_start', 'realtime_end'], axis=1,inplace=True)

In [105]:
# Rename FRED 

fred_api_df = fred_api_df.rename(columns={'date': 'Date', 'value': 'Federal Reserve Rate'})

In [106]:
# checking head and tails

fred_api_df

Unnamed: 0,Date,Federal Reserve Rate
0,1954-07-01,0.80
1,1954-08-01,1.22
2,1954-09-01,1.06
3,1954-09-01,1.07
4,1954-09-01,1.06
...,...,...
894,2024-07-01,5.33
895,2024-08-01,5.33
896,2024-09-01,5.13
897,2024-10-01,4.83


In [107]:
# Match the dates of the BEA dataset frame and drop duplicates

clean_fred_api_df = fred_api_df.loc[(fred_api_df['Date'] >= '1959-01-01') & (fred_api_df['Date'] <= '2024-10-01')]

clean_fred_api_df = clean_fred_api_df.drop_duplicates(subset='Date', keep='first')

In [108]:
# Change column "Date" into datetime

clean_fred_api_df['Date'] = pd.to_datetime(clean_fred_api_df['Date'])

In [109]:
#In order to read and plot observations of data,
#one must get the date and value nested inside of the json dictionary to the third level
bea_df = pd.json_normalize(bea_data["BEAAPI"]["Results"]["Data"])

bea_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23818 entries, 0 to 23817
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   TableName        23818 non-null  object
 1   SeriesCode       23818 non-null  object
 2   LineNumber       23818 non-null  object
 3   LineDescription  23818 non-null  object
 4   TimePeriod       23818 non-null  object
 5   METRIC_NAME      23818 non-null  object
 6   CL_UNIT          23818 non-null  object
 7   UNIT_MULT        23818 non-null  object
 8   DataValue        23818 non-null  object
 9   NoteRef          23818 non-null  object
dtypes: object(10)
memory usage: 1.8+ MB


In [110]:
# Check BEA datframe
bea_df

Unnamed: 0,TableName,SeriesCode,LineNumber,LineDescription,TimePeriod,METRIC_NAME,CL_UNIT,UNIT_MULT,DataValue,NoteRef
0,T20805,DPCERC,1,Personal consumption expenditures (PCE),1959M01,Current Dollars,Level,6,306091,T20805
1,T20805,DPCERC,1,Personal consumption expenditures (PCE),1959M02,Current Dollars,Level,6,309554,T20805
2,T20805,DPCERC,1,Personal consumption expenditures (PCE),1959M03,Current Dollars,Level,6,312702,T20805
3,T20805,DPCERC,1,Personal consumption expenditures (PCE),1959M04,Current Dollars,Level,6,312193,T20805
4,T20805,DPCERC,1,Personal consumption expenditures (PCE),1959M05,Current Dollars,Level,6,316130,T20805
...,...,...,...,...,...,...,...,...,...,...
23813,T20805,DPCXRC,31,Market-based PCE excluding food and energy,2024M06,Current Dollars,Level,6,14808577,"T20805,T20805.6"
23814,T20805,DPCXRC,31,Market-based PCE excluding food and energy,2024M07,Current Dollars,Level,6,14909961,"T20805,T20805.6"
23815,T20805,DPCXRC,31,Market-based PCE excluding food and energy,2024M08,Current Dollars,Level,6,14950375,"T20805,T20805.6"
23816,T20805,DPCXRC,31,Market-based PCE excluding food and energy,2024M09,Current Dollars,Level,6,15063032,"T20805,T20805.6"


In [111]:
# Converting "TimePeriod" to datetime via splitting by "M" 
# then using pd.to_datetime through lambda function 
bea_test=bea_df['TimePeriod'].str.split('M')

bea_dates= bea_test.apply(lambda x: pd.to_datetime(f"{x[0]}-{x[1]}"))
bea_dates

0       1959-01-01
1       1959-02-01
2       1959-03-01
3       1959-04-01
4       1959-05-01
           ...    
23813   2024-06-01
23814   2024-07-01
23815   2024-08-01
23816   2024-09-01
23817   2024-10-01
Name: TimePeriod, Length: 23818, dtype: datetime64[ns]

In [112]:
# Putting converted datetime into copy of dataframe 
bea_time_updated=bea_df.copy()
bea_time_updated["Time"]=bea_dates


In [113]:
# Filtering by .loc to have only Personal Consumption Expenditures (PCEs)
pce_df = bea_time_updated.loc[bea_time_updated['LineDescription'] == 'Personal consumption expenditures (PCE)']

In [114]:
# Convert DataValue from string to int
pce_df["DataValue"] = pce_df["DataValue"].str.replace(',', '')
pce_df["DataValue"] = pce_df["DataValue"].astype(str).astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pce_df["DataValue"] = pce_df["DataValue"].str.replace(',', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pce_df["DataValue"] = pce_df["DataValue"].astype(str).astype(int)


In [115]:
pce_df["DataValue"]

0        306091
1        309554
2        312702
3        312193
4        316130
         ...   
785    19747490
786    19859264
787    19898041
788    20027196
789    20099540
Name: DataValue, Length: 790, dtype: int32

In [116]:
# Filtering dataframe columns to Date and PCEs then renaming the columns 
clean_pce_df = pce_df[["Time","DataValue"]]
clean_pce_df= clean_pce_df.rename(columns={'Time': 'Date', 'DataValue': 'PCE'})


In [79]:
#FROM Data_Cleaning_New_2.jpynb
#Merge the PCE and Federal Funds Rate data
PCE_FedFunds = pd.merge(clean_pce_df, clean_fred_api_df, on='Date', how='inner')

In [118]:
# Converting Federal Reserve Rate into Float to compute with .diff() function
PCE_FedFunds["Federal Reserve Rate"] 
PCE_FedFunds["Federal Reserve Rate"]  = PCE_FedFunds["Federal Reserve Rate"].astype(float)

In [119]:
#FROM Data_Cleaning_New_2.jpynb
#Add a column for the calulated movement of the Federal Reserve Rate from the previous month
PCE_FedFunds['FedRate Movement'] = PCE_FedFunds['Federal Reserve Rate'].diff()

#Zero the first empty value
PCE_FedFunds['FedRate Movement'].fillna(0, inplace=True)

#Check the new column
PCE_FedFunds.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  PCE_FedFunds['FedRate Movement'].fillna(0, inplace=True)


Unnamed: 0,Date,PCE,Federal Reserve Rate,FedRate Movement
0,1959-01-01,306091,2.48,0.0
1,1959-02-01,309554,2.43,-0.05
2,1959-03-01,312702,2.8,0.37
3,1959-04-01,312193,2.96,0.16
4,1959-05-01,316130,2.9,-0.06


In [120]:
# Export data to CSV without the index
PCE_FedFunds.to_csv("API_PCE_FedFunds.csv",index=False)