In [1]:
# Data manipulation
import numpy as np
import pandas as pd

# Data visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Fred library
# install fredapi library if it is not already installed, "pip install fredapi"
from fredapi import Fred

fred = Fred(api_key='you_API_key_here')

In [2]:
# Now we will import our desired data series by calling "get_series()"
# Identify the series id: 
# Go to the series webpage, and we will find the series id in parentheses 
# that are directly beside the title of the graph of the series

# designate a start and end date for the series
start = '1970-01-31'
end = '2023-01-31'

# Variable 1: 
# 30-Year Fixed Rate Mortgage Average in the United States (MORTGAGE30US)
# https://fred.stlouisfed.org/series/MORTGAGE30US

# utilize pd.DataFrame() to convert our series into a pandas data frame
header_name = ['mortgage_int']
mortgage30raw = pd.DataFrame(fred.get_series(
    'MORTGAGE30US',
    observation_start=start,
    observation_end=end)
    , columns=header_name)

# note that the raw data points are every weekday, four times a month
mortgage30raw.head()

Unnamed: 0,mortgage_int
1971-04-02,7.33
1971-04-09,7.31
1971-04-16,7.31
1971-04-23,7.31
1971-04-30,7.29


In [3]:
# To get monthly data of the series, we will apply the "resample()" function
# with the month argument and then take the mean of that series.
# This will give us a monthly average of all the series of the data we pulled.
mortgage30MO = pd.DataFrame(fred.get_series(
    'MORTGAGE30US',
    observation_start=start,
    observation_end=end)).resample("M").mean()

mortgage30MO.head()

Unnamed: 0,0
1971-04-30,7.31
1971-05-31,7.425
1971-06-30,7.53
1971-07-31,7.604
1971-08-31,7.6975


In [4]:
# Similarly, we can get yearly data
mortgage30YR = pd.DataFrame(fred.get_series(
    'MORTGAGE30US',
    observation_start=start,
    observation_end=end)).resample("Y").mean()

mortgage30YR.head()

Unnamed: 0,0
1971-12-31,7.54175
1972-12-31,7.383269
1973-12-31,8.044808
1974-12-31,9.187115
1975-12-31,9.047115


In [5]:
# Variable 2:
# Rental Vacancy Rate in the United States (RRVRUSQ156N)
# https://fred.stlouisfed.org/series/RRVRUSQ156N

header_name = ['rental_vacancy']
rental_vac_rt = pd.DataFrame(fred.get_series(
    'RRVRUSQ156N',
    observation_start = start,
    observation_end = end)
    , columns=header_name)

rental_vac_rt.head()

Unnamed: 0,rental_vacancy
1970-01-01,5.4
1970-04-01,5.4
1970-07-01,5.3
1970-10-01,5.2
1971-01-01,5.3


In [6]:
# Variable 3:
# Consumer Price Index for All Urban Consumers: All Items in U.S. City Average (CPIAUCSL)
# https://fred.stlouisfed.org/series/CPIAUCSL

header_name = ['cpi']
cpi = pd.DataFrame(fred.get_series(
    'CPIAUCSL',
    observation_start=start,
    observation_end=end
), columns=header_name)
cpi.head()

Unnamed: 0,cpi
1970-01-01,37.9
1970-02-01,38.1
1970-03-01,38.3
1970-04-01,38.5
1970-05-01,38.6


In [7]:
# Now we use pd.concat() to combine the two time series DataFrames along the columns axis 
# to create a single DataFrame containing all three series

# Concatenate the three DataFrames along the columns axis (axis=1)
fed_df = pd.concat([mortgage30raw,rental_vac_rt, cpi], axis=1)

fed_df.head(10)

Unnamed: 0,mortgage_int,rental_vacancy,cpi
1970-01-01,,5.4,37.9
1970-02-01,,,38.1
1970-03-01,,,38.3
1970-04-01,,5.4,38.5
1970-05-01,,,38.6
1970-06-01,,,38.8
1970-07-01,,5.3,38.9
1970-08-01,,,39.0
1970-09-01,,,39.2
1970-10-01,,5.2,39.4


In [8]:
# Note that the Mortgage data is starting from 1971, so it has NaN values in 1970
# Also, rental vacancy rate data is quarterly, and cpi data is montly
# To convert the combined data to be weekly, we will adopt the following approach:

# fill missing values forward and remove NaN rows
fed_df = fed_df.ffill().dropna()
fed_df

# Here's what we are doing:
# The "ffill()" method stands for "forward fill." It is used to fill missing values in the DataFrame 
# by propagating the last valid observation forward. In other words, if there are any 
# NaN (Not-a-Number) values in the DataFrame, ffill() will replace them with the most recent non-NaN 
# value that occurred before them in the same column.

# After applying ffill(), there might still be rows with NaN values if the initial part of the 
# DataFrame had missing values before the first valid observation (which is the case in Mortgage data). 
# By calling "dropna()"", these rows with NaN values are removed from the DataFrame.

Unnamed: 0,mortgage_int,rental_vacancy,cpi
1971-04-02,7.33,5.3,40.100
1971-04-09,7.31,5.3,40.100
1971-04-16,7.31,5.3,40.100
1971-04-23,7.31,5.3,40.100
1971-04-30,7.29,5.3,40.100
...,...,...,...
2023-01-01,6.42,6.4,300.536
2023-01-05,6.48,6.4,300.536
2023-01-12,6.33,6.4,300.536
2023-01-19,6.15,6.4,300.536
