In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import TimeSeriesSplit

In [None]:
df = pd.read_csv('index.csv')
df.insert(0, "Date", pd.to_datetime(df[["Year", "Month", "Day"]]))      # create datetime column
df.set_index(["Date"], inplace=True)                                    # set index to date, easy to handle 
df.drop(columns=["Year", "Month", "Day"], inplace=True)                 # deop redundant columns
df.drop(columns=["Federal Funds Target Rate", "Federal Funds Upper Target", "Federal Funds Lower Target"], inplace=True)        # these columns will not be used


targ_df = pd.DataFrame(df["Real GDP (Percent Change)"])     # create target and drop the NaNs to get to quarterly values


# determine date when inflation rate begins to ve recorded 
for i,j in enumerate(df["Inflation Rate"]):
    if not np.isnan(j):
        print(f"Inflation rates begin: {df.index[i]}")
        break        
 
df.drop(df.loc["1954-07-01":"1958-01-01"].index, inplace=True)      # drop the rows with no inflation rate
df.drop(df.loc["2016-11-01":].index, inplace=True) # drop as there is no target for these dates
targ_df.drop(targ_df.loc["1954-07-01":"1958-01-01"].index, inplace=True)      # drop the rows with no inflation rate
targ_df.drop(targ_df.loc["2016-11-01":].index, inplace=True) # drop as there is no target for these dates



# remove mid-month entries as these were wgen target rates were chanmged, theses only contain NaNs for our feqture vaeriables 
idx_mm = [i for i,j in enumerate(df.index.day) if j != 1]
df.drop(df.iloc[idx_mm].index, inplace=True) 
# same for targ df
idx_mm = [i for i,j in enumerate(targ_df.index.day) if j != 1]
targ_df.drop(targ_df.iloc[idx_mm].index, inplace=True) 


tss = TimeSeriesSplit(n_splits = 2)         # use time series train test split so we do not train on future data, 2 splits to increse cv sample size
for train_index, test_index in tss.split(df, targ_df):
    X_train_sm, X_test_sm = df.iloc[train_index, :].copy(), df.iloc[test_index,:].copy()
    y_train_sm, y_test_sm = targ_df.iloc[train_index].copy(), targ_df.iloc[test_index].copy()
X_train_sm.drop(columns="Real GDP (Percent Change)", inplace=True)
X_test_sm.drop(columns="Real GDP (Percent Change)", inplace=True)



### --------------- Rolling avg features ------------------------
def rolling_avg(X, col, y): 
    mean = []
    for i in y.index:
        idx = X[col].index.get_loc(i)
        mean.append(np.mean([X[col].iloc[idx-1], X[col].iloc[idx-2], X[col].iloc[idx-3]]))
    # mean_df = pd.DataFrame(mean).set_index(y.index)
    X[f"{col}_mean"] = mean

rolling_avg(X_train_sm, "Inflation Rate", y_train_sm)
rolling_avg(X_test_sm, "Inflation Rate", y_test_sm)
rolling_avg(X_train_sm, "Unemployment Rate", y_train_sm)
rolling_avg(X_test_sm, "Unemployment Rate", y_test_sm)
rolling_avg(X_train_sm, "Effective Federal Funds Rate", y_train_sm)
rolling_avg(X_test_sm, "Effective Federal Funds Rate", y_test_sm)



### --------------- Rolling std features ------------------------
def rolling_std(X, col, y): 
    std = []
    for i in y.index:
        idx = X[col].index.get_loc(i)
        std.append(np.std([X[col].iloc[idx-1], X[col].iloc[idx-2], X[col].iloc[idx-3]]))
    # mean_df = pd.DataFrame(mean).set_index(y.index)
    X[f"{col}_std"] = std

rolling_std(X_train_sm, "Inflation Rate", y_train_sm)
rolling_std(X_test_sm, "Inflation Rate", y_test_sm)
rolling_std(X_train_sm, "Unemployment Rate", y_train_sm)
rolling_std(X_test_sm, "Unemployment Rate", y_test_sm)
rolling_std(X_train_sm, "Effective Federal Funds Rate", y_train_sm)
rolling_std(X_test_sm, "Effective Federal Funds Rate", y_test_sm)

X_train_sm.drop(columns=[i for i in X_train_sm.columns if ("mean" not in i and "std" not in i)], inplace=True)




lags = [1,2,3,4]        # introduce lag features 
cols = ["Effective Federal Funds Rate_mean", "Inflation Rate_mean", "Unemployment Rate_mean"]  
# loop and dynamically name lags for train
for col in cols:
    for L in lags:
        X_train_sm[f"{col}_lag{L}"] = X_train_sm[col].shift(L)
# same for test
for col in cols:
    for L in lags:
        X_test_sm[f"{col}_lag{L}"] = X_test_sm[col].shift(L)


X_train_sm.drop(X_train_sm.iloc[0:5].index, inplace=True)
X_test_sm.drop(X_test_sm.iloc[0:5].index, inplace=True)


# Now need to reduce to quartely evaluation: 
y_train_sm = y_train_sm.loc["1958-08-01":"1997-03-01"]        # limits of X_train_sm
y_train_sm = y_train_sm.dropna()                              # get rid of monthly values
X_train_sm = X_train_sm.loc[y_train_sm.index]                    # use index of targ to change X
print(f"Check train indices: {sum(y_train_sm.index != X_train_sm.index)}")                     # check


# same for test dataset
y_test_sm = y_test_sm.loc["1997-10-01":"2016-10-01"]
y_test_sm = y_test_sm.dropna()
X_test_sm = X_test_sm.loc[y_test_sm.index]
print(f"check test indices: {sum(y_test_sm.index != X_test_sm.index)}")

# create quarterly lags for GDP
gdp_lags = [1,2,3]
for L in gdp_lags:
    X_train_sm[f"Real GDP (Percent Change)_lag{L}"] = y_train_sm["Real GDP (Percent Change)"].shift(L)

for L in gdp_lags:
    X_test_sm[f"Real GDP (Percent Change)_lag{L}"] = y_test_sm["Real GDP (Percent Change)"].shift(L)

X_train_sm.drop(X_train_sm.iloc[0:3].index, inplace=True)
X_test_sm.drop(X_test_sm.iloc[0:3].index, inplace=True)
y_train_sm.drop(y_train_sm.iloc[0:3].index, inplace=True)
y_test_sm.drop(y_test_sm.iloc[0:3].index, inplace=True)

print(f'NaNs in X_train_sm: {X_train_sm[["Effective Federal Funds Rate_mean", "Inflation Rate_mean", "Unemployment Rate_mean"]].isna().sum()}')       # check if all are removed
print(f'NaNs in y_train_sm: {y_train_sm["Real GDP (Percent Change)"].isna().sum()}')        # check if all are removed
print(f'NaNs in X_test_sm: {X_test_sm[["Effective Federal Funds Rate_mean", "Inflation Rate_mean", "Unemployment Rate_mean"]].isna().sum()}')        # check if all are removed
print(f'NaNs in y_test_sm: {y_test_sm["Real GDP (Percent Change)"].isna().sum()}')        # check if all are removed


X_train_sm.head(6)


Inflation rates begin: 1958-01-01 00:00:00
Check train indices: 0
check test indices: 0
NaNs in X_train_sm: Effective Federal Funds Rate_mean    0
Inflation Rate_mean                  0
Unemployment Rate_mean               0
dtype: int64
NaNs in y_train_sm: 0
NaNs in X_test_sm: Effective Federal Funds Rate_mean    0
Inflation Rate_mean                  0
Unemployment Rate_mean               0
dtype: int64
NaNs in y_test_sm: 0


Unnamed: 0_level_0,Inflation Rate_mean,Unemployment Rate_mean,Effective Federal Funds Rate_mean,Inflation Rate_std,Unemployment Rate_std,Effective Federal Funds Rate_std,Effective Federal Funds Rate_mean_lag1,Effective Federal Funds Rate_mean_lag2,Effective Federal Funds Rate_mean_lag3,Effective Federal Funds Rate_mean_lag4,...,Inflation Rate_mean_lag2,Inflation Rate_mean_lag3,Inflation Rate_mean_lag4,Unemployment Rate_mean_lag1,Unemployment Rate_mean_lag2,Unemployment Rate_mean_lag3,Unemployment Rate_mean_lag4,Real GDP (Percent Change)_lag1,Real GDP (Percent Change)_lag2,Real GDP (Percent Change)_lag3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1959-07-01,1.9,5.1,3.083333,0.141421,0.08165,0.218225,2.886667,2.73,2.57,2.443333,...,1.7,1.7,1.7,5.3,5.566667,5.833333,6.033333,10.1,7.7,9.7
1959-10-01,2.133333,5.266667,3.576667,0.188562,0.169967,0.130213,3.453333,3.253333,3.083333,2.886667,...,2.0,1.9,1.8,5.1,5.066667,5.1,5.3,-0.8,10.1,7.7
1960-01-01,2.233333,5.6,3.99,0.329983,0.216025,0.008165,3.913333,3.746667,3.576667,3.453333,...,2.366667,2.133333,2.0,5.666667,5.466667,5.266667,5.1,1.6,-0.8,10.1
1960-04-01,2.1,5.133333,3.933333,0.141421,0.249444,0.0665,3.983333,3.993333,3.99,3.913333,...,2.0,2.233333,2.366667,5.1,5.433333,5.6,5.666667,9.2,1.6,-0.8
1960-07-01,1.8,5.233333,3.696667,0.141421,0.124722,0.267872,3.87,3.91,3.933333,3.983333,...,2.1,2.1,2.1,5.233333,5.133333,5.133333,5.1,-1.5,9.2,1.6
1960-10-01,1.2,5.533333,2.936667,0.141421,0.04714,0.259015,3.176667,3.466667,3.696667,3.87,...,1.566667,1.8,1.9,5.5,5.333333,5.233333,5.233333,1.0,-1.5,9.2
