<a href="https://colab.research.google.com/github/jacksonmcl/Federal_Funds_Rate_Model/blob/main/1b.%20New%20Data%20Extraction/new_data_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dfs = {}

### Github Setup

In [None]:
! apt-get install git

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.11).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
!git -C "Federal_Funds_Rate_Model" pull || git clone https://github.com/jacksonmcl/Federal_Funds_Rate_Model.git "Federal_Funds_Rate_Model"

Already up to date.


# Helper Functions

In [None]:
# This function cleans a dataframe to change datetime to %Y-%m format
def df_dates_monthly(df, Date = 'Date'):
    clean_df = df.copy()
    clean_df[Date] = pd.to_datetime(clean_df[Date])
    clean_df[Date] = clean_df[Date].dt.strftime('%Y-%m')
    return clean_df

In [None]:
def df_change_percent(df, Value = 'Value'):
    clean_df = df.copy()
    clean_df['ChangePercentMonth'] = round(clean_df[Value].pct_change(),6)
    clean_df['ChangePercentYear'] = round(clean_df[Value].pct_change(12),6)
    return clean_df

In [None]:
def plot_comparison(base_df, comparison_df, base_label, comparison_label, Value='Value'):
    base_df = base_df.copy()
    comparison_df = comparison_df.copy()

    # Convert to Datetime
    base_df['Date'] = pd.to_datetime(base_df['Date'])
    comparison_df['Date'] = pd.to_datetime(comparison_df['Date'])

    # Plot comparison metric
    plt.figure(figsize=(12, 6))
    plt.stackplot(base_df['Date'], base_df[Value], color='lightgray')
    sns.lineplot(x='Date', y=Value, data=comparison_df, label=comparison_label, color='steelblue')

    # Customize plot
    plt.title(f'{base_label} and {comparison_label} (Scaled)')
    plt.xlabel('Date')
    plt.ylabel(f'{comparison_label}(Scaled)')
    plt.grid(True)
    plt.legend()
    plt.show()


# Data Extraction

### Federal Funds Rate
The federal funds rate is the interest rate at which depository institutions (mainly banks) lend reserve balances to other depository institutions overnight on an uncollateralized basis. In simpler terms, it's the rate banks charge each other for short-term loans to meet their reserve requirements.

In [None]:
dfs['Federal_Funds_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/FEDFUNDS.csv')
dfs['Federal_Funds_Rate'].head()

Unnamed: 0,observation_date,FEDFUNDS
0,1954-07-01,0.8
1,1954-08-01,1.22
2,1954-09-01,1.07
3,1954-10-01,0.85
4,1954-11-01,0.83


In [None]:
dfs['Federal_Funds_Rate'].shape

(845, 2)

In [None]:
# Create a copy of the dataset for our lagged Federal Funds Rate predictor
dfs['Lagged_FFR'] = dfs['Federal_Funds_Rate'].copy()
dfs['Lagged_FFR']['observation_date'] = pd.to_datetime(dfs['Lagged_FFR']['observation_date'], format='%Y-%m-%d')

# Move all dates by 1 month ahead of time
dfs['Lagged_FFR']['observation_date'] = dfs['Lagged_FFR']['observation_date'] + pd.DateOffset(months=1)

### Reserve Rate
The interest on reserve balances (IORB) rate is the interest rate that the Federal Reserve pays to eligible institutions for the balances they keep in master accounts at Federal Reserve Banks. The IORB rate is a key tool of monetary policy and is set by the Board of Governors.

In [None]:
dfs['Reserve_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/IOER.csv')
dfs['Reserve_Rate'].head()

Unnamed: 0,observation_date,IOER
0,2008-10-09,0.75
1,2008-10-10,0.75
2,2008-10-11,0.75
3,2008-10-12,0.75
4,2008-10-13,0.75


In [None]:
dfs['Reserve_Rate'].shape

(4676, 2)

### Discount Rate
The discount rate is the interest rate the Federal Reserve charges commercial banks and other financial institutions for short-term loans. The discount rate is applied at the Fed's lending facility, which is called the discount window.

In [None]:
dfs['Discount_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/INTDSRUSM193N.csv')
dfs['Discount_Rate'].head()

Unnamed: 0,observation_date,INTDSRUSM193N
0,1950-01-01,1.5
1,1950-02-01,1.5
2,1950-03-01,1.5
3,1950-04-01,1.5
4,1950-05-01,1.5


In [None]:
dfs['Discount_Rate'].shape

(860, 2)

In [None]:
# Create a copy of the dataset for our lagged Federal Funds Rate predictor
dfs['Lagged_DR'] = dfs['Discount_Rate'].copy()
dfs['Lagged_DR']['observation_date'] = pd.to_datetime(dfs['Lagged_DR']['observation_date'], format='%Y-%m-%d')

# Move all dates by 1 month ahead of time
dfs['Lagged_DR']['observation_date'] = dfs['Lagged_DR']['observation_date'] + pd.DateOffset(months=1)

### Bank Prime Loan Rate
The prime interest rate is the percentage that U.S. commercial banks charge their most creditworthy customers for loans. Like all loan rates, the prime interest rate is derived from the federal funds' overnight rate, set by the Federal Reserve at meetings held eight times a year. The prime interest rate is the benchmark banks and other lenders use when setting their interest rates for every category of loan from credit cards to car loans and mortgages.

In [None]:
dfs['Prime_Loan_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/DPRIME.csv')
dfs['Prime_Loan_Rate'].head()

Unnamed: 0,observation_date,DPRIME
0,1955-08-04,3.25
1,1955-08-05,3.25
2,1955-08-08,3.25
3,1955-08-09,3.25
4,1955-08-10,3.25


In [None]:
dfs['Prime_Loan_Rate'].shape

(18102, 2)

### Consumer Price Index
The Consumer Price Index (CPI) is used as a measure of inflation for policymakers, financial markets, businesses, and consumers. It measures the monthly change in prices paid by U.S. consumers. The Bureau of Labor Statistics (BLS) calculates the CPI as a weighted average of prices for a basket of goods and services representative of aggregate U.S. consumer spending

In [None]:
dfs['Consumer_Price_Index'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/CPIAUCSL.csv')
dfs['Consumer_Price_Index'].head()

Unnamed: 0,observation_date,CPIAUCSL
0,1947-01-01,21.48
1,1947-02-01,21.62
2,1947-03-01,22.0
3,1947-04-01,22.0
4,1947-05-01,21.95


In [None]:
dfs['Consumer_Price_Index'].shape

(935, 2)

### Personal Consumption Expenditures
Personal consumption expenditures (PCE), also known as consumer spending, is a measure of the spending on goods and services by people of the United States. According to the Bureau of Economic Analysis (BEA), a U.S. government agency, PCE accounts for about two-thirds of domestic spending and is a significant driver of gross domestic product (GDP).

In [None]:
dfs['Personal_Consumption_Expenditures'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/PCE.csv')
dfs['Personal_Consumption_Expenditures'].head()

Unnamed: 0,observation_date,PCE
0,1959-01-01,306.1
1,1959-02-01,309.6
2,1959-03-01,312.7
3,1959-04-01,312.2
4,1959-05-01,316.1


In [None]:
dfs['Personal_Consumption_Expenditures'].shape

(791, 2)

### Producer Price Index
The Producer Price Index (PPI) measures the average change over time in the prices domestic producers receive for their output. It is a measure of inflation at the wholesale level that is compiled from thousands of indexes measuring producer prices by industry and product category. The index is published monthly by the U.S. Bureau of Labor Statistics (BLS).

In [None]:
dfs['Producer_Price_Index'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/PPIACO.csv')
dfs['Producer_Price_Index'].head()

Unnamed: 0,observation_date,PPIACO
0,1913-01-01,12.1
1,1913-02-01,12.0
2,1913-03-01,12.0
3,1913-04-01,12.0
4,1913-05-01,11.9


In [None]:
dfs['Producer_Price_Index'].shape

(1343, 2)

### GDP-Based Recession Indicator Index
A GDP recession index is a mathematical model that estimates the probability of a recession based on the most recent GDP data. The GDP-Based Recession Indicator Index (JHGDPBRINDX) is one such index that is reported quarterly by FRED. When the index value rises above 67%, it's a historically reliable indicator that the economy is in a recession.

In [None]:
dfs['GDP_Recession_Index'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/JHGDPBRINDX.csv')
dfs['GDP_Recession_Index'].head()

Unnamed: 0,observation_date,JHGDPBRINDX
0,1967-10-01,3.8348
1,1968-01-01,1.7614
2,1968-04-01,1.2127
3,1968-07-01,2.3009
4,1968-10-01,6.3338


In [None]:
dfs['GDP_Recession_Index'].shape

(227, 2)

### Unemployment Rate
The U.S. unemployment rate is released on the first Friday of every month for the preceding month. The current and past editions of the report are available from the Bureau of Labor Statistics (BLS). The unemployment rate is the percentage of the labor force without a job. It is a lagging indicator and generally rises or falls with changing economic conditions.

In [None]:
dfs['Unemployment_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/UNRATE.csv')
dfs['Unemployment_Rate'].head()

Unnamed: 0,observation_date,UNRATE
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5


In [None]:
dfs['Unemployment_Rate'].shape

(923, 2)

### Automobile Loan Rate
The weighted average interest rate a consumer in the United States pays at Commercial Banks on a 48 Month loan for a new automobile.


In [None]:
dfs['Auto_Loan_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/TERMCBAUTO48NS.csv')
dfs['Auto_Loan_Rate'].head()

Unnamed: 0,observation_date,TERMCBAUTO48NS
0,1972-02-01,10.2
1,1972-03-01,
2,1972-04-01,
3,1972-05-01,9.96
4,1972-06-01,


In [None]:
dfs['Auto_Loan_Rate'].shape

(631, 2)

### 30-Year Fixed Rate Mortgage Average
The term fixed-rate mortgage refers to a home loan that has a fixed interest rate for the entire term of the loan. This means that the mortgage carries a constant interest rate from beginning to end. Fixed-rate mortgages are popular products for consumers who want to know how much they have to pay every month. Fixed-rate mortgages may be open or closed with specific terms of 15 or 30 years or they may run for a length of time agreed upon by the lender and borrower.

In [None]:
dfs['Mortgage_Rate'] = pd.read_csv('Federal_Funds_Rate_Model/1. Data Extraction/MORTGAGE30US.csv')
dfs['Mortgage_Rate'].head()

Unnamed: 0,observation_date,MORTGAGE30US
0,1971-04-02,7.33
1,1971-04-09,7.31
2,1971-04-16,7.31
3,1971-04-23,7.31
4,1971-04-30,7.29


In [None]:
dfs['Mortgage_Rate'].shape

(2806, 2)

# Data Cleaning

In [None]:
# Standarize Data
clean_dfs = {}
target_start = min(dfs['Federal_Funds_Rate']['observation_date'])

for label, df in dfs.items():
  clean_df = df.copy()
  clean_df.columns = ['Date', 'Value']
  # clean_df['Value'] = clean_df['Value']/max(clean_df['Value'])
  clean_df = clean_df[clean_df['Date'] >= target_start]
  clean_dfs[label] = clean_df

In [None]:
# Add Percent Changes, and Clean Date Column
for label, df in clean_dfs.items():
    df = df_dates_monthly(df)
    df = df_change_percent(df)
    df.drop_duplicates(inplace=True)
    clean_dfs[label] = df

  clean_df['ChangePercentMonth'] = round(clean_df[Value].pct_change(),6)
  clean_df['ChangePercentYear'] = round(clean_df[Value].pct_change(12),6)
  clean_df['ChangePercentMonth'] = round(clean_df[Value].pct_change(),6)
  clean_df['ChangePercentYear'] = round(clean_df[Value].pct_change(12),6)


# Export Data

In [None]:
for label, df in clean_dfs.items():
    df.to_csv(f'clean_{label}.csv', index=False)
    df.to_csv(f'clean_{label}.csv', index=False)