In [198]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

from sklearn.model_selection import  cross_validate, learning_curve, GridSearchCV, TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('index.csv')
df.insert(0, "Date", pd.to_datetime(df[["Year", "Month", "Day"]]))      # create datetime column
df.set_index(["Date"], inplace=True)                                    # set index to date, easy to handle 
df.drop(columns=["Year", "Month", "Day"], inplace=True)                 # deop redundant columns

df.drop(columns=["Federal Funds Target Rate", "Federal Funds Upper Target", "Federal Funds Lower Target"], inplace=True)        # these columns will not be used

targ_df = pd.DataFrame(df["Real GDP (Percent Change)"])     # create target and drop the NaNs to get to quarterly values
targ_df.head(5)

Unnamed: 0_level_0,Real GDP (Percent Change)
Date,Unnamed: 1_level_1
1954-07-01,4.6
1954-08-01,
1954-09-01,
1954-10-01,8.0
1954-11-01,


In [200]:
# determine date when inflation rate begins to ve recorded 
for i,j in enumerate(df["Inflation Rate"]):
    if not np.isnan(j):
        print(df.iloc[i])
        break         

Effective Federal Funds Rate     2.72
Real GDP (Percent Change)      -10.00
Unemployment Rate                5.80
Inflation Rate                   3.20
Name: 1958-01-01 00:00:00, dtype: float64


In [201]:
df.drop(df.loc["1954-07-01":"1958-01-01"].index, inplace=True)      # drop the rows with no inflation rate
df.drop(df.loc["2016-11-01":].index, inplace=True) # drop as there is no target for these dates

targ_df.drop(targ_df.loc["1954-07-01":"1958-01-01"].index, inplace=True)      # drop the rows with no inflation rate
targ_df.drop(targ_df.loc["2016-11-01":].index, inplace=True) # drop as there is no target for these dates

In [202]:
# remove mid-month entries as these were wgen target rates were chanmged, theses only contain NaNs for our feqture vaeriables 
idx_mm = [i for i,j in enumerate(df.index.day) if j != 1]
df.drop(df.iloc[idx_mm].index, inplace=True) 
# same for targ df
idx_mm = [i for i,j in enumerate(targ_df.index.day) if j != 1]
targ_df.drop(targ_df.iloc[idx_mm].index, inplace=True) 

In [214]:
tss = TimeSeriesSplit(n_splits = 2)         # use time series train test split so we do not train on future data, 2 splits to increse cv sample size

for train_index, test_index in tss.split(df, targ_df):
    X_train, X_test = df.iloc[train_index, :].copy(), df.iloc[test_index,:].copy()
    y_train, y_test = targ_df.iloc[train_index].copy(), targ_df.iloc[test_index].copy()

In [204]:
X_train["Real GDP (Percent Change)"] = df["Real GDP (Percent Change)"].interpolate()       # interpolate the target so we can create quarterly lags
X_test["Real GDP (Percent Change)"] = df["Real GDP (Percent Change)"].interpolate()       # interpolate the target so we can create quarterly lags
X_train.head(3)

Unnamed: 0_level_0,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1958-02-01,1.67,,6.4,3.2
1958-03-01,1.2,,6.7,2.8
1958-04-01,1.26,2.6,7.4,2.4


In [205]:
# remove nans from lags 
y_train = y_train.loc["1958-02-01":"1997-03-25"].dropna()
y_test = y_test.loc["1997-03-25":"1997-03-25"].dropna()

In [206]:
lags = [1,2,3,4,5]        # introduce lag features 
cols = ["Effective Federal Funds Rate", "Inflation Rate", "Unemployment Rate", "Real GDP (Percent Change)"]  # include target to create its own lags 

# loop and dynamically name lags
for col in cols:
    for L in lags:
        X_train[f"{col} lag{L}"] = X_train[col].shift(L)
X_train.tail(5)

Unnamed: 0_level_0,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Effective Federal Funds Rate lag1,Effective Federal Funds Rate lag2,Effective Federal Funds Rate lag3,Effective Federal Funds Rate lag4,Effective Federal Funds Rate lag5,Inflation Rate lag1,...,Unemployment Rate lag1,Unemployment Rate lag2,Unemployment Rate lag3,Unemployment Rate lag4,Unemployment Rate lag5,Real GDP (Percent Change) lag1,Real GDP (Percent Change) lag2,Real GDP (Percent Change) lag3,Real GDP (Percent Change) lag4,Real GDP (Percent Change) lag5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-11-01,5.31,3.9,5.4,2.6,5.24,5.3,5.22,5.4,5.27,2.6,...,5.2,5.2,5.1,5.5,5.3,4.3,4.1,3.9,3.7,4.866667
1996-12-01,5.29,3.5,5.4,2.6,5.31,5.24,5.3,5.22,5.4,2.6,...,5.4,5.2,5.2,5.1,5.5,3.9,4.3,4.1,3.9,3.7
1997-01-01,5.25,3.1,5.3,2.5,5.29,5.31,5.24,5.3,5.22,2.6,...,5.4,5.4,5.2,5.2,5.1,3.5,3.9,4.3,4.1,3.9
1997-02-01,5.19,4.133333,5.2,2.5,5.25,5.29,5.31,5.24,5.3,2.5,...,5.3,5.4,5.4,5.2,5.2,3.1,3.5,3.9,4.3,4.1
1997-03-01,5.39,5.166667,5.2,2.5,5.19,5.25,5.29,5.31,5.24,2.5,...,5.2,5.3,5.4,5.4,5.2,4.133333,3.1,3.5,3.9,4.3


In [207]:
lags = [1,2,3,4,5]        # introduce lag features 
cols = ["Effective Federal Funds Rate", "Inflation Rate", "Unemployment Rate", "Real GDP (Percent Change)"]  # include target to create its own lags 

# loop and dynamically name lags
for col in cols:
    for L in lags:
        X_test[f"{col} lag{L}"] = X_test[col].shift(L)
X_test.head(5)

Unnamed: 0_level_0,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Effective Federal Funds Rate lag1,Effective Federal Funds Rate lag2,Effective Federal Funds Rate lag3,Effective Federal Funds Rate lag4,Effective Federal Funds Rate lag5,Inflation Rate lag1,...,Unemployment Rate lag1,Unemployment Rate lag2,Unemployment Rate lag3,Unemployment Rate lag4,Unemployment Rate lag5,Real GDP (Percent Change) lag1,Real GDP (Percent Change) lag2,Real GDP (Percent Change) lag3,Real GDP (Percent Change) lag4,Real GDP (Percent Change) lag5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997-04-01,5.51,6.2,5.1,2.7,,,,,,,...,,,,,,,,,,
1997-05-01,5.5,5.866667,4.9,2.5,5.51,,,,,2.7,...,5.1,,,,,6.2,,,,
1997-06-01,5.56,5.533333,5.0,2.4,5.5,5.51,,,,2.5,...,4.9,5.1,,,,5.866667,6.2,,,
1997-07-01,5.52,5.2,4.9,2.4,5.56,5.5,5.51,,,2.4,...,5.0,4.9,5.1,,,5.533333,5.866667,6.2,,
1997-08-01,5.54,4.5,4.8,2.3,5.52,5.56,5.5,5.51,,2.4,...,4.9,5.0,4.9,5.1,,5.2,5.533333,5.866667,6.2,


In [208]:
X_train.drop(X_train.iloc[0:6].index, inplace=True)
X_test.drop(X_test.iloc[0:6].index, inplace=True)

In [209]:
X_train.drop(columns="Real GDP (Percent Change)", inplace=True)
X_train.head(5)

Unnamed: 0_level_0,Effective Federal Funds Rate,Unemployment Rate,Inflation Rate,Effective Federal Funds Rate lag1,Effective Federal Funds Rate lag2,Effective Federal Funds Rate lag3,Effective Federal Funds Rate lag4,Effective Federal Funds Rate lag5,Inflation Rate lag1,Inflation Rate lag2,...,Unemployment Rate lag1,Unemployment Rate lag2,Unemployment Rate lag3,Unemployment Rate lag4,Unemployment Rate lag5,Real GDP (Percent Change) lag1,Real GDP (Percent Change) lag2,Real GDP (Percent Change) lag3,Real GDP (Percent Change) lag4,Real GDP (Percent Change) lag5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1958-08-01,1.53,7.4,2.1,0.68,0.93,0.63,1.26,1.2,2.4,2.1,...,7.5,7.3,7.4,7.4,6.7,9.6,7.266667,4.933333,2.6,
1958-09-01,1.76,7.1,1.7,1.53,0.68,0.93,0.63,1.26,2.1,2.4,...,7.4,7.5,7.3,7.4,7.4,9.633333,9.6,7.266667,4.933333,2.6
1958-10-01,1.8,6.7,1.7,1.76,1.53,0.68,0.93,0.63,1.7,2.1,...,7.1,7.4,7.5,7.3,7.4,9.666667,9.633333,9.6,7.266667,4.933333
1958-11-01,2.27,6.2,1.7,1.8,1.76,1.53,0.68,0.93,1.7,1.7,...,6.7,7.1,7.4,7.5,7.3,9.7,9.666667,9.633333,9.6,7.266667
1958-12-01,2.42,6.2,1.7,2.27,1.8,1.76,1.53,0.68,1.7,1.7,...,6.2,6.7,7.1,7.4,7.5,9.033333,9.7,9.666667,9.633333,9.6


In [210]:
X_test.drop(columns="Real GDP (Percent Change)", inplace=True)
X_test.head(5)

Unnamed: 0_level_0,Effective Federal Funds Rate,Unemployment Rate,Inflation Rate,Effective Federal Funds Rate lag1,Effective Federal Funds Rate lag2,Effective Federal Funds Rate lag3,Effective Federal Funds Rate lag4,Effective Federal Funds Rate lag5,Inflation Rate lag1,Inflation Rate lag2,...,Unemployment Rate lag1,Unemployment Rate lag2,Unemployment Rate lag3,Unemployment Rate lag4,Unemployment Rate lag5,Real GDP (Percent Change) lag1,Real GDP (Percent Change) lag2,Real GDP (Percent Change) lag3,Real GDP (Percent Change) lag4,Real GDP (Percent Change) lag5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997-10-01,5.5,4.7,2.3,5.54,5.54,5.52,5.56,5.5,2.2,2.3,...,4.9,4.8,4.9,5.0,4.9,3.8,4.5,5.2,5.533333,5.866667
1997-11-01,5.52,4.6,2.2,5.5,5.54,5.54,5.52,5.56,2.3,2.2,...,4.7,4.9,4.8,4.9,5.0,3.1,3.8,4.5,5.2,5.533333
1997-12-01,5.5,4.7,2.2,5.52,5.5,5.54,5.54,5.52,2.2,2.3,...,4.6,4.7,4.9,4.8,4.9,3.4,3.1,3.8,4.5,5.2
1998-01-01,5.56,4.6,2.2,5.5,5.52,5.5,5.54,5.54,2.2,2.2,...,4.7,4.6,4.7,4.9,4.8,3.7,3.4,3.1,3.8,4.5
1998-02-01,5.51,4.6,2.3,5.56,5.5,5.52,5.5,5.54,2.2,2.2,...,4.6,4.7,4.6,4.7,4.9,4.0,3.7,3.4,3.1,3.8


In [211]:
print(X_train[["Unemployment Rate", "Effective Federal Funds Rate", "Inflation Rate"]].isna().sum() )       # check if all are removed
print(y_train["Real GDP (Percent Change)"].isna().sum())        # check if all are removed
print(X_test[["Unemployment Rate", "Effective Federal Funds Rate", "Inflation Rate"]].isna().sum())        # check if all are removed
print(y_test["Real GDP (Percent Change)"].isna().sum())        # check if all are removed

Unemployment Rate               0
Effective Federal Funds Rate    0
Inflation Rate                  0
dtype: int64
0
Unemployment Rate               0
Effective Federal Funds Rate    0
Inflation Rate                  0
dtype: int64
0


Now need to reduce to quartely evaluation: 

In [212]:
y_train = y_train.loc["1958-08-01":"1997-03-01"]        # limits of X_train
y_train = y_train.dropna()                              # get rid of monthly values
X_train = X_train.loc[y_train.index]                    # use index of targ to change X
sum(y_train.index != X_train.index)                     # check

np.int64(0)

In [213]:
# same for test dataset
y_test = y_test.loc["1997-10-01":"2016-10-01"]
y_test = y_test.dropna()
X_test = X_test.loc[y_test.index]
sum(y_train.index != X_train.index)

np.int64(0)