In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime as dt

%matplotlib inline

In [2]:
# data output display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 12)
pd.set_option('display.precision', 3) 

# Federal Rates Dataset

https://www.kaggle.com/federalreserve/interest-rates/downloads/interest-rates.zip/1

In [3]:
federal_rates = pd.read_csv('index.csv')

# Exploring the data

In [4]:
#preview the dataframe we're working with
federal_rates

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate
0,1954,7,1,,,,0.80,4.6,5.8,
1,1954,8,1,,,,1.22,,6.0,
2,1954,9,1,,,,1.06,,6.1,
3,1954,10,1,,,,0.85,8.0,5.7,
4,1954,11,1,,,,0.83,,5.3,
5,1954,12,1,,,,1.28,,5.0,
...,...,...,...,...,...,...,...,...,...,...
898,2016,12,1,,0.50,0.25,0.54,,4.7,2.2
899,2016,12,14,,0.75,0.50,,,,
900,2017,1,1,,0.75,0.50,0.65,,4.8,2.3


In [5]:
#check for data types and null values
federal_rates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 904 entries, 0 to 903
Data columns (total 10 columns):
Year                            904 non-null int64
Month                           904 non-null int64
Day                             904 non-null int64
Federal Funds Target Rate       462 non-null float64
Federal Funds Upper Target      103 non-null float64
Federal Funds Lower Target      103 non-null float64
Effective Federal Funds Rate    752 non-null float64
Real GDP (Percent Change)       250 non-null float64
Unemployment Rate               752 non-null float64
Inflation Rate                  710 non-null float64
dtypes: float64(7), int64(3)
memory usage: 70.7 KB


In [6]:
#combine date columns under one column
federal_rates['Year'] = federal_rates['Year'].astype(str) + '-' + federal_rates['Month'].astype(str) + '-' + federal_rates['Day'].astype(str)


In [7]:
federal_rates.drop(['Month', 'Day', 
                      'Federal Funds Target Rate', 'Federal Funds Upper Target', 'Federal Funds Lower Target', 'Real GDP (Percent Change)'
                   ], axis=1, inplace=True)

In [8]:
# column names
federal_rates.columns = [x.upper() for x in ['Date', 'Interest', 'Unemployment',
       'Inflation']]
# federal_rates.columns = [x.upper() for x in federal_rates.columns]
# federal_rates.rename(columns={'YEAR':'DATE'}, inplace=True)
federal_rates

Unnamed: 0,DATE,INTEREST,UNEMPLOYMENT,INFLATION
0,1954-7-1,0.80,5.8,
1,1954-8-1,1.22,6.0,
2,1954-9-1,1.06,6.1,
3,1954-10-1,0.85,5.7,
4,1954-11-1,0.83,5.3,
5,1954-12-1,1.28,5.0,
...,...,...,...,...
898,2016-12-1,0.54,4.7,2.2
899,2016-12-14,,,
900,2017-1-1,0.65,4.8,2.3


In [9]:
# filter by date to match mortgage rate data
# federal_rates = federal_rates[(federal_rates['day'] == 1) & (federal_rates['year'] > 1970)].reset_index()

# drop last row wth null inflation rate
# inflation_rates.drop(axis=0,index=len(inflation_rates)-1,inplace=True)
# inflation_rates['Inflation Rate'].dropna(axis=0, how='any', inplace=True)

In [10]:
federal_rates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 904 entries, 0 to 903
Data columns (total 4 columns):
DATE            904 non-null object
INTEREST        752 non-null float64
UNEMPLOYMENT    752 non-null float64
INFLATION       710 non-null float64
dtypes: float64(3), object(1)
memory usage: 28.3+ KB


In [11]:
federal_rates['DATE'] = pd.to_datetime(federal_rates['DATE']) #, format='%Y-%m')
federal_rates

Unnamed: 0,DATE,INTEREST,UNEMPLOYMENT,INFLATION
0,1954-07-01,0.80,5.8,
1,1954-08-01,1.22,6.0,
2,1954-09-01,1.06,6.1,
3,1954-10-01,0.85,5.7,
4,1954-11-01,0.83,5.3,
5,1954-12-01,1.28,5.0,
...,...,...,...,...
898,2016-12-01,0.54,4.7,2.2
899,2016-12-14,,,
900,2017-01-01,0.65,4.8,2.3


In [12]:
federal_rates.to_csv('federal.csv', index=False)